# -*- python -*- # ...that is: cython import sys import cython from libc.stdlib cimport malloc, free, realloc, calloc from libc.string cimport memcpy, memmove cdef extern from "inttypes.h": ctypedef int int32_t ctypedef unsigned int weightint ctypedef unsigned short int ruleint ctypedef int32_t hashint ctypedef unsigned int maskint ctypedef unsigned int chars_lengthint # Node = namedtuple('n',['cost','hash','bitmaps','rule','parent']) cdef struct node: weightint cost hashint hash maskint *bitmaps chars_lengthint bitmaps_length ruleint rule node *parent # Rule = namedtuple('r',['hash','cost','bitmaps']) cdef struct rule: hashint hash weightint cost maskint *bitmaps chars_lengthint bitmaps_length # Items in a priority queue cdef struct pqi: node *value pqi *next # Priority queue type cdef struct cpq: weightint current weightint size pqi **data ## The counting priority queue implementation # # Since priorities are integers, during programme execution only # decrease (=increase in value) and have a max value, we use an array # of length max, which we walk down until empty. Each cell in the # array holds a linked list with items of that priority. # # def init(length): # return [()]*length cdef cpq *init(weightint size) except NULL: cdef cpq *p = malloc(sizeof(cpq)) p.data = calloc(sizeof(pqi *),size) # NB! all cells should init to NULL if p == NULL: raise MemoryError('Cannot init priority queue') elif p.data == NULL: free(p) raise MemoryError('Cannot init priority queue data') p.size = size p.current = 0 return p # cdef walk(pq): # for i in xrange(len(pq)): # while pq[i]: # y = pq[i][0] # pq[i] = pq[i][1] # yield y # Unlike the py implementation, we here have the returned item in the # second argument slot and return a value indicating success or not (=empty queue) cdef bint walk(cpq *p, pqi **item): cdef weightint i if p.data[p.current]: item[0] = p.data[p.current] p.data[p.current] = p.data[p.current].next return 1 else: for i in range(p.current,p.size): if p.data[i]: item[0] = p.data[i] p.data[i] = p.data[i].next p.current = i return 1 item[0] = NULL return 0 # def add(pq,k,v): # pq[k] = (v,pq[k]) # Assuming we only send in a valid pointer to the queue, this cannot # fail?? cdef void add(cpq *p, weightint i, pqi *item): item.next = p.data[i] p.data[i] = item ## Locating the next applicable rule # def nextrule(node,rules,threshold): # r = node.rule # while 1: # if r >= len(rules): # return len(rules), [] # if rules[r].cost+node.cost > threshold: # return len(rules),[] # bitmaps = [] # for bitmap in sorted(nb|rb for nb in node.bitmaps for rb in rules[r].bitmaps if not nb&rb): # for bitmap0 in bitmaps: # if (bitmap0 | bitmap) == bitmap: # break # else: # bitmaps.append(bitmap) # if bitmaps: # return r, bitmaps # r += 1 cdef ruleint nextrule(node *node, rule *rules, ruleint rules_length, weightint threshold, maskint **bitmaps, chars_lengthint *bitmaps_length) except? 0: # rule 0 is only used once at beginning, after that: error code cdef maskint bitmap cdef chars_lengthint i, j, bitmaps0_length cdef maskint bitmaps0[50000] cdef ruleint r = node.rule while 1: if r >= rules_length: bitmaps[0] = NULL bitmaps_length[0] = 0 return rules_length if rules[r].cost + node.cost > threshold: bitmaps[0] = NULL bitmaps_length[0] = 0 return rules_length bitmaps0_length = 0 for i in range(node.bitmaps_length): for j in range(rules[r].bitmaps_length): if not node.bitmaps[i] & rules[r].bitmaps[j]: bitmaps0_length += insert(node.bitmaps[i] | rules[r].bitmaps[j],bitmaps0,bitmaps0_length) if bitmaps0_length >= 49999: raise IndexError('Too many bitmaps...') # bitmaps0[bitmaps0_length] = node.bitmaps[i] | rules[r].bitmaps[j] # bitmaps0_length += 1 if bitmaps0_length: bitmaps[0] = malloc(sizeof(maskint)*bitmaps0_length) bitmaps_length[0] = bitmaps0_length if bitmaps[0] == NULL: bitmaps_length[0] = 0 raise MemoryError('Cannot alloc bitmap list of length %s' % bitmaps0_length) memcpy(bitmaps[0],bitmaps0,sizeof(maskint)*bitmaps0_length) return r r += 1 cdef chars_lengthint insert(maskint bitmap, maskint *bitmaps,chars_lengthint bitmaps_length): cdef chars_lengthint i for i in range(bitmaps_length): if bitmap | bitmaps[i] == bitmap: return 0 elif bitmap < bitmaps[i]: memmove(&bitmaps[i+1],&bitmaps[i],(bitmaps_length-i)*sizeof(maskint)) bitmaps[i] = bitmap return 1 bitmaps[bitmaps_length] = bitmap return 1 ## Main generator, interface with python # def dijkstrafind(rules,anahash,_,threshold): # beyond_rules = len(rules) # rules = [Rule(*rule[0:3]) for rule in rules] # # Corresponding to Fig 1 of Ah & Bo (2012). # q = init(threshold+1) # 1 # u0 = Node(cost=0,hash=0,bitmaps=[],rule=len(rules)+1,parent=None) # 2 # add(q,0,Node(cost=0,hash=anahash,bitmaps=[0],rule=0,parent=u0)) # 3, 4 # for i,v in enumerate(walk(q)): # 5, 6 # yield v.hash,v.cost,i # 7 # r, wbitmaps = nextrule(v,rules,threshold) # 8(, 10) # if r < beyond_rules: # 8 # w = Node(cost=v.cost+rules[r].cost, # 10 # hash=v.hash+rules[r].hash, # 10 # bitmaps=wbitmaps, # 10 # rule=r, # 10 # parent=v._replace(rule=r+1,parent=None)) # 9,10 # add(q,w.cost,w) # 11 # u = v.parent # 13 # r, wbitmaps = nextrule(u,rules,threshold) # 14(, 16) # if r < beyond_rules: # 14 # w = Node(u.cost+rules[r].cost, # 16 # u.hash+rules[r].hash, # 16 # bitmaps=wbitmaps, # 16 # rule=r, # 16 # parent=u._replace(rule=r+1)) # 15, 16 # add(q,w.cost,w) # 17 def dijkstrafind(list pyrules,maskint anahash,_,weightint threshold, max_iterations): cdef ruleint rules_length = len(pyrules) cdef ruleint i,r cdef rule *rules = malloc(sizeof(rule)*rules_length) cdef chars_lengthint j cdef int NODECOUNT = 0 cdef int MAXNODECOUNT = 0 for i in range(rules_length): rules[i].hash = pyrules[i][0] rules[i].cost = pyrules[i][1] rules[i].bitmaps_length = len(pyrules[i][2]) rules[i].bitmaps = malloc(sizeof(maskint)*rules[i].bitmaps_length) if rules[i].bitmaps == NULL: raise MemoryError('Cannot alloc rules...') for j in range(rules[i].bitmaps_length): rules[i].bitmaps[j] = pyrules[i][2][j] cdef cpq *q = init(threshold+1) cdef node *u0 = malloc(sizeof(node)) NODECOUNT += 1 u0.cost = 0 u0.hash = 0 u0.bitmaps = calloc(1,sizeof(maskint)) # more convenient than NULL u0.bitmaps_length = 0 u0.rule = rules_length u0.parent = NULL cdef node *u1 = malloc(sizeof(node)) NODECOUNT += 1 u1.cost = 0 u1.hash = anahash u1.bitmaps = calloc(1,sizeof(maskint)) u1.bitmaps_length = 1 u1.rule = 0 u1.parent = u0 cdef pqi *u1_pqi = malloc(sizeof(pqi)) u1_pqi.value = u1 u1_pqi.next = NULL add(q,0,u1_pqi) u0 = NULL u1 = NULL cdef int iteration = 0 cdef pqi *w_pqi = NULL cdef pqi *v_pqi = NULL cdef node *w = NULL cdef node *v = NULL cdef node *u = NULL cdef maskint *wbitmaps = NULL cdef chars_lengthint wbitmaps_length = 0 while walk(q,&v_pqi): v = v_pqi.value u = v.parent free(v_pqi) try: yield v.hash, v.cost, iteration except GeneratorExit: break if NODECOUNT > max_iterations: #print # print 'NB! Max nodecount reached, giving up on this word...(%d)' % NODECOUNT break iteration += 1 r = nextrule(v,rules,rules_length,threshold,&wbitmaps,&wbitmaps_length) if r < rules_length: w = malloc(sizeof(node)) NODECOUNT += 1 w.cost = v.cost+rules[r].cost w.hash = v.hash+rules[r].hash w.bitmaps = wbitmaps w.bitmaps_length = wbitmaps_length w.rule = r w.parent = v v.parent = NULL v.rule += 1 w_pqi = malloc(sizeof(pqi)) w_pqi.value = w w_pqi.next = NULL add(q,w.cost,w_pqi) else: free(v.bitmaps) free(v) v = NULL NODECOUNT -= 1 r = nextrule(u,rules,rules_length,threshold,&wbitmaps,&wbitmaps_length) if r < rules_length: w = malloc(sizeof(node)) NODECOUNT += 1 w.cost = u.cost+rules[r].cost w.hash = u.hash+rules[r].hash w.bitmaps = wbitmaps w.bitmaps_length = wbitmaps_length w.rule = r w.parent = u u.rule += 1 w_pqi = malloc(sizeof(pqi)) w_pqi.value = w w_pqi.next = NULL add(q,w.cost,w_pqi) else: free(u.bitmaps) free(u) u = NULL NODECOUNT -= 1 # cleanup if not v==NULL: free(v.bitmaps) free(v) v = NULL NODECOUNT -= 1 if not u==NULL: # u = v.parent free(u.bitmaps) free(u) u = NULL NODECOUNT -= 1 while walk(q,&v_pqi): if not v_pqi == NULL: if not v_pqi.value == NULL: if not v_pqi.value.parent == NULL: free(v_pqi.value.parent.bitmaps) free(v_pqi.value.parent) NODECOUNT -= 1 free(v_pqi.value.bitmaps) free(v_pqi.value) NODECOUNT -= 1 free(v_pqi) # print 'Maxnodes', MAXNODECOUNT # print 'Nodes:', NODECOUNT free(q.data) free(q) for i in range(rules_length): free(rules[i].bitmaps) free(rules)