#-*- coding: utf-8 -*- ''' Created on Oct 21, 2016 Compound analysis for Swedish words using SPyRo (Swedish Python Routines). SPyRo has been upgraded from 2 to 3. @author: David ''' from lib.splitter import Splitter from lib.saldo import SALDO # Initialize saldo saldo = SALDO() # Initialize compound splitter splitter = Splitter(saldo) def compounds(word,pos): ''' Uses SPyRo/SALDO splitter to split a word into compounds. Filters out improbable results where one or both compound segments are not semantic words. (?) Filtering ignores cases where compound segments could not be identified as semantic words. (?) ''' word = word.decode('unicode-escape') candidates = splitter.split(word, pos) plausible_candidates = [] if not candidates is None: for segments in candidates: if splitter.is_semantic_compound(word, segments, pos) in [True, None]: # If True, both compounds are semantic words ? # If None, could not determine status ? # If False, at least one compound is not a word ? plausible_candidates.append(segments) return plausible_candidates if __name__ == '__main__': print(compounds("blodtrycksproblem", "NN"))