module Store where -- (get_data, process_data, Store,storeWords,queryStore) where import qualified Data.Map as Map import Types import Data.Maybe(isJust) import Tok import Data.Char(isAlphaNum,toLower) import Data.ErrM import RegExp import Data.List import qualified Data.Lex import qualified Data.Par import qualified Data.Abs data Left = L [Token] deriving (Show) data Right = R [Token] deriving (Show) type Store = Map.Map String [(Left,Right)] get_data :: String -> Options -> Either [UToken] [[Token]] get_data s op = -- map (process_tokens op) $ if is_structured s then parse_data s op else tokenize_data s op remove_monad :: Err a -> a remove_monad (Bad s) = error s remove_monad (Ok a) = a parse_data :: String -> Options -> Either [UToken] [[Token]] parse_data s op = Right $ case remove_monad (Data.Par.pInput (Data.Par.myLexer s)) of (Data.Abs.Input tss) -> [-- process_tokens op (map one_tok ts) | (Data.Abs.D ts) <- tss] where one_tok (Data.Abs.T word ps) = (word,map create_patt ps) create_patt p = case p of (Data.Abs.PC (Data.Abs.Ident a) ps) -> Types.P a (map create_patt ps) (Data.Abs.Id (Data.Abs.Ident i)) -> Types.PId i {- process_tokens :: Options -> [Token] -> [Token] process_tokens opts [] = [] process_tokens opts (t@(s,xs):ts) | nosymb_b && isPunctS s || isNumber s = process_tokens opts ts | uncap_b = (map Data.Char.toLower s,xs):process_tokens opts ts | otherwise = t:process_tokens opts ts where nosymb_b = elem "-nosymb" opts uncap_b = elem "-uncap" opts get_data_sentences :: String -> [String] get_data_sentences s = get_d s [] True where get_d [] s _ = [reverse s] get_d (c:cs) s b = case c of '\\' -> case cs of ('\"':cs) -> get_d cs ('"':c:s) b _ -> get_d cs (c:s) b '\"' -> get_d cs (c:s) (not b) '{' | b -> get_d cs "{" b '}' | b -> (reverse (c:s)):get_d cs [] b c -> get_d cs (c:s) b get_sentences :: String -> [String] get_sentences xs = gets xs [] where gets [] s = [reverse s] gets (c:cs) s | isMajor c = (reverse (c:s)):gets cs [] | otherwise = gets cs (c:s) isMajor c = elem c ".?!" -} tokenize_data :: String -> Options -> Either [UToken] [[Token]] tokenize_data s op | elem "-c" op = Right $ sentences $ lines s -- $ tokens op s | otherwise = Left $ lines s where sentences [] = [] sentences ts = case span (\t -> t /= "") ts of (ts,[]) -> [zip ts (repeat [])] (ts,r@(_:_)) -> (zip ts (repeat [])):sentences r --line_it :: [String] -> [[Token]] --line_it [] = [] --line_it xs = case span (/= ".") xs of -- ([],[]) -> [] -- (ts,[]) -> [zip ts (repeat [])] -- (ts,(_:ys)) -> (zip ts (repeat [])):line_it ys is_structured :: String -> Bool is_structured ('{':cs) = True is_structured (c:cs) | isAlphaNum c = False | otherwise = is_structured cs -- The input is a list of sentences. -- I.e. a sentence is a list of tokens. -- A token is a word annotated with an ambiguity class. process_data :: Either [UToken] [[Token]] -> (Boundary, Boundary) -> Store process_data (Right xs) bs = foldr (process bs) empty_store xs process_data (Left xs) bs = process_u xs empty_store :: Store empty_store = Map.empty process_u :: [UToken] -> Store process_u ts = Map.fromList [(t,[]) | t <- ts] process :: (Boundary, Boundary) -> [Token] -> Store -> Store process (lb,rs) ts st = foldr ins st (context ts) where ins (k,(p,a)) st = Map.insertWith f k [(L (appBound lb (reverse p) 0),R (appBound rs a 1))] st f [x] xs = (x:xs) context xs = [ case splitAt n xs of (p,(s@(key,_):a)) -> (key,(p,(s:a))) | n <- [0 .. (length xs-1)]] appBound None _ _ = [] appBound Unlimited xs _ = xs appBound (BSize n) xs n1 = take (n+n1) xs storeWords :: Store -> [String] storeWords st = Map.keys st type InHead = Bool queryStore :: Store -> String -> (Maybe Constraint) -> InHead -> Maybe [[(Variable,String)]] queryStore st s (Nothing) b = if (b || (isJust (Map.lookup s st))) then Just [[]] else Nothing queryStore st s (Just cs) b = case Map.lookup s st of Nothing | b -> case check_constraint cs (L [],R [(s,[])]) of Nothing -> Nothing Just xs -> Just [xs] -- To be able to match constructed word forms. Nothing -> Nothing Just cxts -> case [xs | (Just xs) <- (map (check_constraint cs) cxts)] of [] -> Nothing xss -> Just xss storeRemove :: Store -> String -> Store storeRemove st s = Map.delete s st storeSize :: Store -> Int storeSize st = Map.size st check_constraint :: Constraint -> (Left,Right) -> Maybe [(Variable,String)] check_constraint cs (L ls,R rs) = case evaluate_constraint cs (ls,rs) [] of Right (xs,_) -> xs Left v -> var_error v evaluate_constraint :: Constraint -> ([Token],[Token]) -> [(Var,Maybe Pos)] -> Either Var (Maybe [(Variable,String)], [(Var,Maybe Pos)]) evaluate_constraint (Atom ((Just p),reg,patt)) (ls,rs) env = check_position p reg patt (ls,rs) env evaluate_constraint (Atom (Nothing,_,_)) _ _ = error "position must be specified" evaluate_constraint l cxt env = case l of Conj l1 l2 -> case traverse_left_or_right l1 l2 cxt env of (b1,b2,env') -> case (b1,b2) of (Just xs,Just ys) -> Right (Just (xs++ys), env') _ -> Right (Nothing, env') Disj l1 l2 -> case traverse_left_or_right l1 l2 cxt env of (b1,b2,env') -> case (b1,b2) of (Just xs,Just ys) -> Right (Just (xs++ys), env') (Just xs,_) -> Right (Just xs,env') (_,Just xs) -> Right (Just xs,env') _ -> Right (Nothing,env') Neg l1 -> case evaluate_constraint l1 cxt env of Left v -> var_error v Right (b,env') -> case b of Just [] -> Right (Nothing, env') Nothing -> Right (Just [], env') -- Variable binding may occur in left or in right branch of the expression, so we have to visit -- both before we know that a variable is undefined. traverse_left_or_right :: Constraint -> Constraint -> ([Token],[Token]) -> [(Var,Maybe Pos)] -> (Maybe [(Variable,String)], Maybe [(Variable,String)], [(Var,Maybe Pos)]) traverse_left_or_right l1 l2 cxt env = case evaluate_constraint l1 cxt env of Left _ -> case evaluate_constraint l2 cxt env of Left v -> var_error v Right (xs,new_env) -> case evaluate_constraint l1 cxt new_env of Left v -> var_error v Right (ys,new_env') -> (xs,ys,new_env') Right (xs,new_env) -> case evaluate_constraint l2 cxt new_env of Left v -> var_error v Right (ys,new_env') -> (xs,ys,new_env') -- Found an unidentified variable. Should be treat more gracefully than -- with 'error'. Preferably by an initial type check. var_error :: String -> a var_error v = error $ "Unknown variable in constraint: " ++ v -- manage the positioning of the constraint check_position :: Position -> Maybe Reg -> Maybe (Unique,Patt String) -> ([Token],[Token]) -> [(Var,Maybe Pos)] -> Either Var (Maybe [(Variable,String)], [(Var,Maybe Pos)]) check_position (Reference v pos) reg patt (ls,rs) env = case (lookup v env) of Nothing -> Left v Just (Just p) -> Right (check_token (find_token p pos (ls,rs)) reg patt,env) Just Nothing -> Right (Nothing,env) check_position (Relative p) reg patt (ls,rs) env = Right (check_token (find_token p 0 (ls,rs)) reg patt,env) check_position (Spanning p var) reg patt (ls,rs) env = case roaming (if p>=0 then zip (drop p rs) [p..] else zip (drop ((abs p)-1) ls) (map negate [(abs p)..])) reg patt of Nothing -> Right (Nothing, addEnv var (Nothing) env) Just (xs,pos) -> Right (Just xs, addEnv var (Just pos) env) check_position (VarSpanning v p_rel var) reg patt (ls,rs) env = case lookup v env of Nothing -> Left v Just Nothing -> Right (Nothing,env) Just (Just p_ref) -> let p = p_ref+p_rel in case roaming (if p>=0 then zip (drop p rs) [p..] else zip (drop ((abs p)-1) ls) (map negate [(abs p)..])) reg patt of Nothing -> Right (Nothing, addEnv var (Nothing) env) Just (xs,pos) -> Right (Just xs, addEnv var (Just pos) env) -- Match a token against, possibly, a regular expression and a pattern. check_token :: Maybe Token -> Maybe Reg -> Maybe (Unique,Patt String) -> Maybe [(Variable,String)] check_token Nothing reg patt = Nothing check_token (Just t) reg patt = case check_reg t reg of res@(Just xs) | check_patt t patt -> res _ -> Nothing -- Retrieve tok at position specified by both of input Positions from context. find_token :: Pos -> Pos -> ([Token],[Token]) -> Maybe Token find_token p pos (ls,rs) = case ((pos+p),length ls, length rs) of (p',ll,rl) | p' < 0 && (abs p') <= ll -> return $ ls !! ((abs p') - 1) (p',ll,rl) | p' >= 0 && p' < rl -> return $ rs !! (p') otherwise -> Nothing -- if reg is '_', it is trivially true, otherwise, match with token string. check_reg :: Token -> Maybe Reg -> Maybe [(Variable,String)] check_reg _ Nothing = Just [] check_reg (w,_) (Just r) = matchReg r w -- if pattern is '_', it is trivially true, -- otherwise, all patterns in current token is match pattern. check_patt :: Token -> Maybe (Unique,Patt String) -> Bool check_patt _ Nothing = True check_patt (w,ps) (Just (False,patt)) = or $ map (match_patt patt) ps check_patt (w,[p]) (Just (True,patt)) = match_patt patt p check_patt _ _ = False -- match two patterns. match_patt :: Patt String -> Patt String -> Bool match_patt (P a ps) (P b ps1) = if length ps == length ps1 then a == b && and [match_patt p1 p2 | (p1,p2) <- zip ps ps1] else False match_patt PW _ = True match_patt p1 p2 = p1 == p2 -- unbounded reference. -- This is now first hit. Should perhaps be any hit. roaming :: [(Token,Pos)] -> Maybe Reg -> Maybe (Unique,Patt String) -> Maybe ([(Variable,String)],Pos) roaming [] _ _ = Nothing roaming ((x,p):xs) reg patt = case check_reg x reg of (Just xs) | check_patt x patt -> return (xs,p) _ -> roaming xs reg patt -- position reference table. Note that a variable can be defined, but lacking position, -- since roaming can fail. addEnv :: Maybe Var -> Maybe Pos -> [(Var,Maybe Pos)] -> [(Var,Maybe Pos)] addEnv Nothing _ env = env addEnv (Just v) pos env = (v,pos):env