rax.c 74 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927
  1. /* Rax -- A radix tree implementation.
  2. *
  3. * Version 1.2 -- 7 February 2019
  4. *
  5. * Copyright (c) 2017-2019, Salvatore Sanfilippo <antirez at gmail dot com>
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions are met:
  10. *
  11. * * Redistributions of source code must retain the above copyright notice,
  12. * this list of conditions and the following disclaimer.
  13. * * Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. * * Neither the name of Redis nor the names of its contributors may be used
  17. * to endorse or promote products derived from this software without
  18. * specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  21. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  24. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30. * POSSIBILITY OF SUCH DAMAGE.
  31. */
  32. #include <stdlib.h>
  33. #include <string.h>
  34. #include <assert.h>
  35. #include <stdio.h>
  36. #include <errno.h>
  37. #include <math.h>
  38. #include "rax.h"
  39. #ifndef RAX_MALLOC_INCLUDE
  40. #define RAX_MALLOC_INCLUDE "rax_malloc.h"
  41. #endif
  42. #include RAX_MALLOC_INCLUDE
  43. /* This is a special pointer that is guaranteed to never have the same value
  44. * of a radix tree node. It's used in order to report "not found" error without
  45. * requiring the function to have multiple return values. */
  46. void *raxNotFound = (void*)"rax-not-found-pointer";
  47. /* -------------------------------- Debugging ------------------------------ */
  48. void raxDebugShowNode(const char *msg, raxNode *n);
  49. /* Turn debugging messages on/off by compiling with RAX_DEBUG_MSG macro on.
  50. * When RAX_DEBUG_MSG is defined by default Rax operations will emit a lot
  51. * of debugging info to the standard output, however you can still turn
  52. * debugging on/off in order to enable it only when you suspect there is an
  53. * operation causing a bug using the function raxSetDebugMsg(). */
  54. #ifdef RAX_DEBUG_MSG
  55. #define debugf(...) \
  56. if (raxDebugMsg) { \
  57. printf("%s:%s:%d:\t", __FILE__, __func__, __LINE__); \
  58. printf(__VA_ARGS__); \
  59. fflush(stdout); \
  60. }
  61. #define debugnode(msg,n) raxDebugShowNode(msg,n)
  62. #else
  63. #define debugf(...)
  64. #define debugnode(msg,n)
  65. #endif
  66. /* By default log debug info if RAX_DEBUG_MSG is defined. */
  67. static int raxDebugMsg = 1;
  68. /* When debug messages are enabled, turn them on/off dynamically. By
  69. * default they are enabled. Set the state to 0 to disable, and 1 to
  70. * re-enable. */
  71. void raxSetDebugMsg(int onoff) {
  72. raxDebugMsg = onoff;
  73. }
  74. /* ------------------------- raxStack functions --------------------------
  75. * The raxStack is a simple stack of pointers that is capable of switching
  76. * from using a stack-allocated array to dynamic heap once a given number of
  77. * items are reached. It is used in order to retain the list of parent nodes
  78. * while walking the radix tree in order to implement certain operations that
  79. * need to navigate the tree upward.
  80. * ------------------------------------------------------------------------- */
  81. /* Initialize the stack. */
  82. static inline void raxStackInit(raxStack *ts) {
  83. ts->stack = ts->static_items;
  84. ts->items = 0;
  85. ts->maxitems = RAX_STACK_STATIC_ITEMS;
  86. ts->oom = 0;
  87. }
  88. /* Push an item into the stack, returns 1 on success, 0 on out of memory. */
  89. static inline int raxStackPush(raxStack *ts, void *ptr) {
  90. if (ts->items == ts->maxitems) {
  91. if (ts->stack == ts->static_items) {
  92. ts->stack = rax_malloc(sizeof(void*)*ts->maxitems*2);
  93. if (ts->stack == NULL) {
  94. ts->stack = ts->static_items;
  95. ts->oom = 1;
  96. errno = ENOMEM;
  97. return 0;
  98. }
  99. memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems);
  100. } else {
  101. void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2);
  102. if (newalloc == NULL) {
  103. ts->oom = 1;
  104. errno = ENOMEM;
  105. return 0;
  106. }
  107. ts->stack = newalloc;
  108. }
  109. ts->maxitems *= 2;
  110. }
  111. ts->stack[ts->items] = ptr;
  112. ts->items++;
  113. return 1;
  114. }
  115. /* Pop an item from the stack, the function returns NULL if there are no
  116. * items to pop. */
  117. static inline void *raxStackPop(raxStack *ts) {
  118. if (ts->items == 0) return NULL;
  119. ts->items--;
  120. return ts->stack[ts->items];
  121. }
  122. /* Return the stack item at the top of the stack without actually consuming
  123. * it. */
  124. static inline void *raxStackPeek(raxStack *ts) {
  125. if (ts->items == 0) return NULL;
  126. return ts->stack[ts->items-1];
  127. }
  128. /* Free the stack in case we used heap allocation. */
  129. static inline void raxStackFree(raxStack *ts) {
  130. if (ts->stack != ts->static_items) rax_free(ts->stack);
  131. }
  132. /* ----------------------------------------------------------------------------
  133. * Radix tree implementation
  134. * --------------------------------------------------------------------------*/
  135. /* Return the padding needed in the characters section of a node having size
  136. * 'nodesize'. The padding is needed to store the child pointers to aligned
  137. * addresses. Note that we add 4 to the node size because the node has a four
  138. * bytes header. */
  139. #define raxPadding(nodesize) ((sizeof(void*)-((nodesize+4) % sizeof(void*))) & (sizeof(void*)-1))
  140. /* Return the pointer to the last child pointer in a node. For the compressed
  141. * nodes this is the only child pointer. */
  142. #define raxNodeLastChildPtr(n) ((raxNode**) ( \
  143. ((char*)(n)) + \
  144. raxNodeCurrentLength(n) - \
  145. sizeof(raxNode*) - \
  146. (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \
  147. ))
  148. /* Return the pointer to the first child pointer. */
  149. #define raxNodeFirstChildPtr(n) ((raxNode**) ( \
  150. (n)->data + \
  151. (n)->size + \
  152. raxPadding((n)->size)))
  153. /* Return the current total size of the node. Note that the second line
  154. * computes the padding after the string of characters, needed in order to
  155. * save pointers to aligned addresses. */
  156. #define raxNodeCurrentLength(n) ( \
  157. sizeof(raxNode)+(n)->size+ \
  158. raxPadding((n)->size)+ \
  159. ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \
  160. (((n)->iskey && !(n)->isnull)*sizeof(void*)) \
  161. )
  162. /* Allocate a new non compressed node with the specified number of children.
  163. * If datafield is true, the allocation is made large enough to hold the
  164. * associated data pointer.
  165. * Returns the new node pointer. On out of memory NULL is returned. */
  166. raxNode *raxNewNode(size_t children, int datafield) {
  167. size_t nodesize = sizeof(raxNode)+children+raxPadding(children)+
  168. sizeof(raxNode*)*children;
  169. if (datafield) nodesize += sizeof(void*);
  170. raxNode *node = rax_malloc(nodesize);
  171. if (node == NULL) return NULL;
  172. node->iskey = 0;
  173. node->isnull = 0;
  174. node->iscompr = 0;
  175. node->size = children;
  176. return node;
  177. }
  178. /* Allocate a new rax and return its pointer. On out of memory the function
  179. * returns NULL. */
  180. rax *raxNew(void) {
  181. rax *rax = rax_malloc(sizeof(*rax));
  182. if (rax == NULL) return NULL;
  183. rax->numele = 0;
  184. rax->numnodes = 1;
  185. rax->head = raxNewNode(0,0);
  186. if (rax->head == NULL) {
  187. rax_free(rax);
  188. return NULL;
  189. } else {
  190. return rax;
  191. }
  192. }
  193. /* realloc the node to make room for auxiliary data in order
  194. * to store an item in that node. On out of memory NULL is returned. */
  195. raxNode *raxReallocForData(raxNode *n, void *data) {
  196. if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */
  197. size_t curlen = raxNodeCurrentLength(n);
  198. return rax_realloc(n,curlen+sizeof(void*));
  199. }
  200. /* Set the node auxiliary data to the specified pointer. */
  201. void raxSetData(raxNode *n, void *data) {
  202. n->iskey = 1;
  203. if (data != NULL) {
  204. n->isnull = 0;
  205. void **ndata = (void**)
  206. ((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
  207. memcpy(ndata,&data,sizeof(data));
  208. } else {
  209. n->isnull = 1;
  210. }
  211. }
  212. /* Get the node auxiliary data. */
  213. void *raxGetData(raxNode *n) {
  214. if (n->isnull) return NULL;
  215. void **ndata =(void**)((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
  216. void *data;
  217. memcpy(&data,ndata,sizeof(data));
  218. return data;
  219. }
  220. /* Add a new child to the node 'n' representing the character 'c' and return
  221. * its new pointer, as well as the child pointer by reference. Additionally
  222. * '***parentlink' is populated with the raxNode pointer-to-pointer of where
  223. * the new child was stored, which is useful for the caller to replace the
  224. * child pointer if it gets reallocated.
  225. *
  226. * On success the new parent node pointer is returned (it may change because
  227. * of the realloc, so the caller should discard 'n' and use the new value).
  228. * On out of memory NULL is returned, and the old node is still valid. */
  229. raxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) {
  230. assert(n->iscompr == 0);
  231. size_t curlen = raxNodeCurrentLength(n);
  232. n->size++;
  233. size_t newlen = raxNodeCurrentLength(n);
  234. n->size--; /* For now restore the original size. We'll update it only on
  235. success at the end. */
  236. /* Alloc the new child we will link to 'n'. */
  237. raxNode *child = raxNewNode(0,0);
  238. if (child == NULL) return NULL;
  239. /* Make space in the original node. */
  240. raxNode *newn = rax_realloc(n,newlen);
  241. if (newn == NULL) {
  242. rax_free(child);
  243. return NULL;
  244. }
  245. n = newn;
  246. /* After the reallocation, we have up to 8/16 (depending on the system
  247. * pointer size, and the required node padding) bytes at the end, that is,
  248. * the additional char in the 'data' section, plus one pointer to the new
  249. * child, plus the padding needed in order to store addresses into aligned
  250. * locations.
  251. *
  252. * So if we start with the following node, having "abde" edges.
  253. *
  254. * Note:
  255. * - We assume 4 bytes pointer for simplicity.
  256. * - Each space below corresponds to one byte
  257. *
  258. * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|
  259. *
  260. * After the reallocation we need: 1 byte for the new edge character
  261. * plus 4 bytes for a new child pointer (assuming 32 bit machine).
  262. * However after adding 1 byte to the edge char, the header + the edge
  263. * characters are no longer aligned, so we also need 3 bytes of padding.
  264. * In total the reallocation will add 1+4+3 bytes = 8 bytes:
  265. *
  266. * (Blank bytes are represented by ".")
  267. *
  268. * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|[....][....]
  269. *
  270. * Let's find where to insert the new child in order to make sure
  271. * it is inserted in-place lexicographically. Assuming we are adding
  272. * a child "c" in our case pos will be = 2 after the end of the following
  273. * loop. */
  274. int pos;
  275. for (pos = 0; pos < n->size; pos++) {
  276. if (n->data[pos] > c) break;
  277. }
  278. /* Now, if present, move auxiliary data pointer at the end
  279. * so that we can mess with the other data without overwriting it.
  280. * We will obtain something like that:
  281. *
  282. * [HDR*][abde][Aptr][Bptr][Dptr][Eptr][....][....]|AUXP|
  283. */
  284. unsigned char *src, *dst;
  285. if (n->iskey && !n->isnull) {
  286. src = ((unsigned char*)n+curlen-sizeof(void*));
  287. dst = ((unsigned char*)n+newlen-sizeof(void*));
  288. memmove(dst,src,sizeof(void*));
  289. }
  290. /* Compute the "shift", that is, how many bytes we need to move the
  291. * pointers section forward because of the addition of the new child
  292. * byte in the string section. Note that if we had no padding, that
  293. * would be always "1", since we are adding a single byte in the string
  294. * section of the node (where now there is "abde" basically).
  295. *
  296. * However we have padding, so it could be zero, or up to 8.
  297. *
  298. * Another way to think at the shift is, how many bytes we need to
  299. * move child pointers forward *other than* the obvious sizeof(void*)
  300. * needed for the additional pointer itself. */
  301. size_t shift = newlen - curlen - sizeof(void*);
  302. /* We said we are adding a node with edge 'c'. The insertion
  303. * point is between 'b' and 'd', so the 'pos' variable value is
  304. * the index of the first child pointer that we need to move forward
  305. * to make space for our new pointer.
  306. *
  307. * To start, move all the child pointers after the insertion point
  308. * of shift+sizeof(pointer) bytes on the right, to obtain:
  309. *
  310. * [HDR*][abde][Aptr][Bptr][....][....][Dptr][Eptr]|AUXP|
  311. */
  312. src = n->data+n->size+
  313. raxPadding(n->size)+
  314. sizeof(raxNode*)*pos;
  315. memmove(src+shift+sizeof(raxNode*),src,sizeof(raxNode*)*(n->size-pos));
  316. /* Move the pointers to the left of the insertion position as well. Often
  317. * we don't need to do anything if there was already some padding to use. In
  318. * that case the final destination of the pointers will be the same, however
  319. * in our example there was no pre-existing padding, so we added one byte
  320. * plus there bytes of padding. After the next memmove() things will look
  321. * like that:
  322. *
  323. * [HDR*][abde][....][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
  324. */
  325. if (shift) {
  326. src = (unsigned char*) raxNodeFirstChildPtr(n);
  327. memmove(src+shift,src,sizeof(raxNode*)*pos);
  328. }
  329. /* Now make the space for the additional char in the data section,
  330. * but also move the pointers before the insertion point to the right
  331. * by shift bytes, in order to obtain the following:
  332. *
  333. * [HDR*][ab.d][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
  334. */
  335. src = n->data+pos;
  336. memmove(src+1,src,n->size-pos);
  337. /* We can now set the character and its child node pointer to get:
  338. *
  339. * [HDR*][abcd][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
  340. * [HDR*][abcd][e...][Aptr][Bptr][Cptr][Dptr][Eptr]|AUXP|
  341. */
  342. n->data[pos] = c;
  343. n->size++;
  344. src = (unsigned char*) raxNodeFirstChildPtr(n);
  345. raxNode **childfield = (raxNode**)(src+sizeof(raxNode*)*pos);
  346. memcpy(childfield,&child,sizeof(child));
  347. *childptr = child;
  348. *parentlink = childfield;
  349. return n;
  350. }
  351. /* Turn the node 'n', that must be a node without any children, into a
  352. * compressed node representing a set of nodes linked one after the other
  353. * and having exactly one child each. The node can be a key or not: this
  354. * property and the associated value if any will be preserved.
  355. *
  356. * The function also returns a child node, since the last node of the
  357. * compressed chain cannot be part of the chain: it has zero children while
  358. * we can only compress inner nodes with exactly one child each. */
  359. raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) {
  360. assert(n->size == 0 && n->iscompr == 0);
  361. void *data = NULL; /* Initialized only to avoid warnings. */
  362. size_t newsize;
  363. debugf("Compress node: %.*s\n", (int)len,s);
  364. /* Allocate the child to link to this node. */
  365. *child = raxNewNode(0,0);
  366. if (*child == NULL) return NULL;
  367. /* Make space in the parent node. */
  368. newsize = sizeof(raxNode)+len+raxPadding(len)+sizeof(raxNode*);
  369. if (n->iskey) {
  370. data = raxGetData(n); /* To restore it later. */
  371. if (!n->isnull) newsize += sizeof(void*);
  372. }
  373. raxNode *newn = rax_realloc(n,newsize);
  374. if (newn == NULL) {
  375. rax_free(*child);
  376. return NULL;
  377. }
  378. n = newn;
  379. n->iscompr = 1;
  380. n->size = len;
  381. memcpy(n->data,s,len);
  382. if (n->iskey) raxSetData(n,data);
  383. raxNode **childfield = raxNodeLastChildPtr(n);
  384. memcpy(childfield,child,sizeof(*child));
  385. return n;
  386. }
  387. /* Low level function that walks the tree looking for the string
  388. * 's' of 'len' bytes. The function returns the number of characters
  389. * of the key that was possible to process: if the returned integer
  390. * is the same as 'len', then it means that the node corresponding to the
  391. * string was found (however it may not be a key in case the node->iskey is
  392. * zero or if simply we stopped in the middle of a compressed node, so that
  393. * 'splitpos' is non zero).
  394. *
  395. * Otherwise if the returned integer is not the same as 'len', there was an
  396. * early stop during the tree walk because of a character mismatch.
  397. *
  398. * The node where the search ended (because the full string was processed
  399. * or because there was an early stop) is returned by reference as
  400. * '*stopnode' if the passed pointer is not NULL. This node link in the
  401. * parent's node is returned as '*plink' if not NULL. Finally, if the
  402. * search stopped in a compressed node, '*splitpos' returns the index
  403. * inside the compressed node where the search ended. This is useful to
  404. * know where to split the node for insertion.
  405. *
  406. * Note that when we stop in the middle of a compressed node with
  407. * a perfect match, this function will return a length equal to the
  408. * 'len' argument (all the key matched), and will return a *splitpos which is
  409. * always positive (that will represent the index of the character immediately
  410. * *after* the last match in the current compressed node).
  411. *
  412. * When instead we stop at a compressed node and *splitpos is zero, it
  413. * means that the current node represents the key (that is, none of the
  414. * compressed node characters are needed to represent the key, just all
  415. * its parents nodes). */
  416. static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) {
  417. raxNode *h = rax->head;
  418. raxNode **parentlink = &rax->head;
  419. size_t i = 0; /* Position in the string. */
  420. size_t j = 0; /* Position in the node children (or bytes if compressed).*/
  421. while(h->size && i < len) {
  422. debugnode("Lookup current node",h);
  423. unsigned char *v = h->data;
  424. if (h->iscompr) {
  425. for (j = 0; j < h->size && i < len; j++, i++) {
  426. if (v[j] != s[i]) break;
  427. }
  428. if (j != h->size) break;
  429. } else {
  430. /* Even when h->size is large, linear scan provides good
  431. * performances compared to other approaches that are in theory
  432. * more sounding, like performing a binary search. */
  433. for (j = 0; j < h->size; j++) {
  434. if (v[j] == s[i]) break;
  435. }
  436. if (j == h->size) break;
  437. i++;
  438. }
  439. if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. */
  440. raxNode **children = raxNodeFirstChildPtr(h);
  441. if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */
  442. memcpy(&h,children+j,sizeof(h));
  443. parentlink = children+j;
  444. j = 0; /* If the new node is non compressed and we do not
  445. iterate again (since i == len) set the split
  446. position to 0 to signal this node represents
  447. the searched key. */
  448. }
  449. debugnode("Lookup stop node is",h);
  450. if (stopnode) *stopnode = h;
  451. if (plink) *plink = parentlink;
  452. if (splitpos && h->iscompr) *splitpos = j;
  453. return i;
  454. }
  455. /* Insert the element 's' of size 'len', setting as auxiliary data
  456. * the pointer 'data'. If the element is already present, the associated
  457. * data is updated (only if 'overwrite' is set to 1), and 0 is returned,
  458. * otherwise the element is inserted and 1 is returned. On out of memory the
  459. * function returns 0 as well but sets errno to ENOMEM, otherwise errno will
  460. * be set to 0.
  461. */
  462. int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) {
  463. size_t i;
  464. int j = 0; /* Split position. If raxLowWalk() stops in a compressed
  465. node, the index 'j' represents the char we stopped within the
  466. compressed node, that is, the position where to split the
  467. node for insertion. */
  468. raxNode *h, **parentlink;
  469. debugf("### Insert %.*s with value %p\n", (int)len, s, data);
  470. i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL);
  471. /* If i == len we walked following the whole string. If we are not
  472. * in the middle of a compressed node, the string is either already
  473. * inserted or this middle node is currently not a key, but can represent
  474. * our key. We have just to reallocate the node and make space for the
  475. * data pointer. */
  476. if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) {
  477. debugf("### Insert: node representing key exists\n");
  478. /* Make space for the value pointer if needed. */
  479. if (!h->iskey || (h->isnull && overwrite)) {
  480. h = raxReallocForData(h,data);
  481. if (h) memcpy(parentlink,&h,sizeof(h));
  482. }
  483. if (h == NULL) {
  484. errno = ENOMEM;
  485. return 0;
  486. }
  487. /* Update the existing key if there is already one. */
  488. if (h->iskey) {
  489. if (old) *old = raxGetData(h);
  490. if (overwrite) raxSetData(h,data);
  491. errno = 0;
  492. return 0; /* Element already exists. */
  493. }
  494. /* Otherwise set the node as a key. Note that raxSetData()
  495. * will set h->iskey. */
  496. raxSetData(h,data);
  497. rax->numele++;
  498. return 1; /* Element inserted. */
  499. }
  500. /* If the node we stopped at is a compressed node, we need to
  501. * split it before to continue.
  502. *
  503. * Splitting a compressed node have a few possible cases.
  504. * Imagine that the node 'h' we are currently at is a compressed
  505. * node containing the string "ANNIBALE" (it means that it represents
  506. * nodes A -> N -> N -> I -> B -> A -> L -> E with the only child
  507. * pointer of this node pointing at the 'E' node, because remember that
  508. * we have characters at the edges of the graph, not inside the nodes
  509. * themselves.
  510. *
  511. * In order to show a real case imagine our node to also point to
  512. * another compressed node, that finally points at the node without
  513. * children, representing 'O':
  514. *
  515. * "ANNIBALE" -> "SCO" -> []
  516. *
  517. * When inserting we may face the following cases. Note that all the cases
  518. * require the insertion of a non compressed node with exactly two
  519. * children, except for the last case which just requires splitting a
  520. * compressed node.
  521. *
  522. * 1) Inserting "ANNIENTARE"
  523. *
  524. * |B| -> "ALE" -> "SCO" -> []
  525. * "ANNI" -> |-|
  526. * |E| -> (... continue algo ...) "NTARE" -> []
  527. *
  528. * 2) Inserting "ANNIBALI"
  529. *
  530. * |E| -> "SCO" -> []
  531. * "ANNIBAL" -> |-|
  532. * |I| -> (... continue algo ...) []
  533. *
  534. * 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node)
  535. *
  536. * |N| -> "NIBALE" -> "SCO" -> []
  537. * |A| -> |-|
  538. * |G| -> (... continue algo ...) |O| -> []
  539. *
  540. * 4) Inserting "CIAO"
  541. *
  542. * |A| -> "NNIBALE" -> "SCO" -> []
  543. * |-|
  544. * |C| -> (... continue algo ...) "IAO" -> []
  545. *
  546. * 5) Inserting "ANNI"
  547. *
  548. * "ANNI" -> "BALE" -> "SCO" -> []
  549. *
  550. * The final algorithm for insertion covering all the above cases is as
  551. * follows.
  552. *
  553. * ============================= ALGO 1 =============================
  554. *
  555. * For the above cases 1 to 4, that is, all cases where we stopped in
  556. * the middle of a compressed node for a character mismatch, do:
  557. *
  558. * Let $SPLITPOS be the zero-based index at which, in the
  559. * compressed node array of characters, we found the mismatching
  560. * character. For example if the node contains "ANNIBALE" and we add
  561. * "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the
  562. * mismatching character is found.
  563. *
  564. * 1. Save the current compressed node $NEXT pointer (the pointer to the
  565. * child element, that is always present in compressed nodes).
  566. *
  567. * 2. Create "split node" having as child the non common letter
  568. * at the compressed node. The other non common letter (at the key)
  569. * will be added later as we continue the normal insertion algorithm
  570. * at step "6".
  571. *
  572. * 3a. IF $SPLITPOS == 0:
  573. * Replace the old node with the split node, by copying the auxiliary
  574. * data if any. Fix parent's reference. Free old node eventually
  575. * (we still need its data for the next steps of the algorithm).
  576. *
  577. * 3b. IF $SPLITPOS != 0:
  578. * Trim the compressed node (reallocating it as well) in order to
  579. * contain $splitpos characters. Change child pointer in order to link
  580. * to the split node. If new compressed node len is just 1, set
  581. * iscompr to 0 (layout is the same). Fix parent's reference.
  582. *
  583. * 4a. IF the postfix len (the length of the remaining string of the
  584. * original compressed node after the split character) is non zero,
  585. * create a "postfix node". If the postfix node has just one character
  586. * set iscompr to 0, otherwise iscompr to 1. Set the postfix node
  587. * child pointer to $NEXT.
  588. *
  589. * 4b. IF the postfix len is zero, just use $NEXT as postfix pointer.
  590. *
  591. * 5. Set child[0] of split node to postfix node.
  592. *
  593. * 6. Set the split node as the current node, set current index at child[1]
  594. * and continue insertion algorithm as usually.
  595. *
  596. * ============================= ALGO 2 =============================
  597. *
  598. * For case 5, that is, if we stopped in the middle of a compressed
  599. * node but no mismatch was found, do:
  600. *
  601. * Let $SPLITPOS be the zero-based index at which, in the
  602. * compressed node array of characters, we stopped iterating because
  603. * there were no more keys character to match. So in the example of
  604. * the node "ANNIBALE", adding the string "ANNI", the $SPLITPOS is 4.
  605. *
  606. * 1. Save the current compressed node $NEXT pointer (the pointer to the
  607. * child element, that is always present in compressed nodes).
  608. *
  609. * 2. Create a "postfix node" containing all the characters from $SPLITPOS
  610. * to the end. Use $NEXT as the postfix node child pointer.
  611. * If the postfix node length is 1, set iscompr to 0.
  612. * Set the node as a key with the associated value of the new
  613. * inserted key.
  614. *
  615. * 3. Trim the current node to contain the first $SPLITPOS characters.
  616. * As usually if the new node length is just 1, set iscompr to 0.
  617. * Take the iskey / associated value as it was in the original node.
  618. * Fix the parent's reference.
  619. *
  620. * 4. Set the postfix node as the only child pointer of the trimmed
  621. * node created at step 1.
  622. */
  623. /* ------------------------- ALGORITHM 1 --------------------------- */
  624. if (h->iscompr && i != len) {
  625. debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n",
  626. h->size, h->data, (void*)h);
  627. debugf("Still to insert: %.*s\n", (int)(len-i), s+i);
  628. debugf("Splitting at %d: '%c'\n", j, ((char*)h->data)[j]);
  629. debugf("Other (key) letter is '%c'\n", s[i]);
  630. /* 1: Save next pointer. */
  631. raxNode **childfield = raxNodeLastChildPtr(h);
  632. raxNode *next;
  633. memcpy(&next,childfield,sizeof(next));
  634. debugf("Next is %p\n", (void*)next);
  635. debugf("iskey %d\n", h->iskey);
  636. if (h->iskey) {
  637. debugf("key value is %p\n", raxGetData(h));
  638. }
  639. /* Set the length of the additional nodes we will need. */
  640. size_t trimmedlen = j;
  641. size_t postfixlen = h->size - j - 1;
  642. int split_node_is_key = !trimmedlen && h->iskey && !h->isnull;
  643. size_t nodesize;
  644. /* 2: Create the split node. Also allocate the other nodes we'll need
  645. * ASAP, so that it will be simpler to handle OOM. */
  646. raxNode *splitnode = raxNewNode(1, split_node_is_key);
  647. raxNode *trimmed = NULL;
  648. raxNode *postfix = NULL;
  649. if (trimmedlen) {
  650. nodesize = sizeof(raxNode)+trimmedlen+raxPadding(trimmedlen)+
  651. sizeof(raxNode*);
  652. if (h->iskey && !h->isnull) nodesize += sizeof(void*);
  653. trimmed = rax_malloc(nodesize);
  654. }
  655. if (postfixlen) {
  656. nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+
  657. sizeof(raxNode*);
  658. postfix = rax_malloc(nodesize);
  659. }
  660. /* OOM? Abort now that the tree is untouched. */
  661. if (splitnode == NULL ||
  662. (trimmedlen && trimmed == NULL) ||
  663. (postfixlen && postfix == NULL))
  664. {
  665. rax_free(splitnode);
  666. rax_free(trimmed);
  667. rax_free(postfix);
  668. errno = ENOMEM;
  669. return 0;
  670. }
  671. splitnode->data[0] = h->data[j];
  672. if (j == 0) {
  673. /* 3a: Replace the old node with the split node. */
  674. if (h->iskey) {
  675. void *ndata = raxGetData(h);
  676. raxSetData(splitnode,ndata);
  677. }
  678. memcpy(parentlink,&splitnode,sizeof(splitnode));
  679. } else {
  680. /* 3b: Trim the compressed node. */
  681. trimmed->size = j;
  682. memcpy(trimmed->data,h->data,j);
  683. trimmed->iscompr = j > 1 ? 1 : 0;
  684. trimmed->iskey = h->iskey;
  685. trimmed->isnull = h->isnull;
  686. if (h->iskey && !h->isnull) {
  687. void *ndata = raxGetData(h);
  688. raxSetData(trimmed,ndata);
  689. }
  690. raxNode **cp = raxNodeLastChildPtr(trimmed);
  691. memcpy(cp,&splitnode,sizeof(splitnode));
  692. memcpy(parentlink,&trimmed,sizeof(trimmed));
  693. parentlink = cp; /* Set parentlink to splitnode parent. */
  694. rax->numnodes++;
  695. }
  696. /* 4: Create the postfix node: what remains of the original
  697. * compressed node after the split. */
  698. if (postfixlen) {
  699. /* 4a: create a postfix node. */
  700. postfix->iskey = 0;
  701. postfix->isnull = 0;
  702. postfix->size = postfixlen;
  703. postfix->iscompr = postfixlen > 1;
  704. memcpy(postfix->data,h->data+j+1,postfixlen);
  705. raxNode **cp = raxNodeLastChildPtr(postfix);
  706. memcpy(cp,&next,sizeof(next));
  707. rax->numnodes++;
  708. } else {
  709. /* 4b: just use next as postfix node. */
  710. postfix = next;
  711. }
  712. /* 5: Set splitnode first child as the postfix node. */
  713. raxNode **splitchild = raxNodeLastChildPtr(splitnode);
  714. memcpy(splitchild,&postfix,sizeof(postfix));
  715. /* 6. Continue insertion: this will cause the splitnode to
  716. * get a new child (the non common character at the currently
  717. * inserted key). */
  718. rax_free(h);
  719. h = splitnode;
  720. } else if (h->iscompr && i == len) {
  721. /* ------------------------- ALGORITHM 2 --------------------------- */
  722. debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n",
  723. h->size, h->data, (void*)h, j);
  724. /* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */
  725. size_t postfixlen = h->size - j;
  726. size_t nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+
  727. sizeof(raxNode*);
  728. if (data != NULL) nodesize += sizeof(void*);
  729. raxNode *postfix = rax_malloc(nodesize);
  730. nodesize = sizeof(raxNode)+j+raxPadding(j)+sizeof(raxNode*);
  731. if (h->iskey && !h->isnull) nodesize += sizeof(void*);
  732. raxNode *trimmed = rax_malloc(nodesize);
  733. if (postfix == NULL || trimmed == NULL) {
  734. rax_free(postfix);
  735. rax_free(trimmed);
  736. errno = ENOMEM;
  737. return 0;
  738. }
  739. /* 1: Save next pointer. */
  740. raxNode **childfield = raxNodeLastChildPtr(h);
  741. raxNode *next;
  742. memcpy(&next,childfield,sizeof(next));
  743. /* 2: Create the postfix node. */
  744. postfix->size = postfixlen;
  745. postfix->iscompr = postfixlen > 1;
  746. postfix->iskey = 1;
  747. postfix->isnull = 0;
  748. memcpy(postfix->data,h->data+j,postfixlen);
  749. raxSetData(postfix,data);
  750. raxNode **cp = raxNodeLastChildPtr(postfix);
  751. memcpy(cp,&next,sizeof(next));
  752. rax->numnodes++;
  753. /* 3: Trim the compressed node. */
  754. trimmed->size = j;
  755. trimmed->iscompr = j > 1;
  756. trimmed->iskey = 0;
  757. trimmed->isnull = 0;
  758. memcpy(trimmed->data,h->data,j);
  759. memcpy(parentlink,&trimmed,sizeof(trimmed));
  760. if (h->iskey) {
  761. void *aux = raxGetData(h);
  762. raxSetData(trimmed,aux);
  763. }
  764. /* Fix the trimmed node child pointer to point to
  765. * the postfix node. */
  766. cp = raxNodeLastChildPtr(trimmed);
  767. memcpy(cp,&postfix,sizeof(postfix));
  768. /* Finish! We don't need to continue with the insertion
  769. * algorithm for ALGO 2. The key is already inserted. */
  770. rax->numele++;
  771. rax_free(h);
  772. return 1; /* Key inserted. */
  773. }
  774. /* We walked the radix tree as far as we could, but still there are left
  775. * chars in our string. We need to insert the missing nodes. */
  776. while(i < len) {
  777. raxNode *child;
  778. /* If this node is going to have a single child, and there
  779. * are other characters, so that that would result in a chain
  780. * of single-childed nodes, turn it into a compressed node. */
  781. if (h->size == 0 && len-i > 1) {
  782. debugf("Inserting compressed node\n");
  783. size_t comprsize = len-i;
  784. if (comprsize > RAX_NODE_MAX_SIZE)
  785. comprsize = RAX_NODE_MAX_SIZE;
  786. raxNode *newh = raxCompressNode(h,s+i,comprsize,&child);
  787. if (newh == NULL) goto oom;
  788. h = newh;
  789. memcpy(parentlink,&h,sizeof(h));
  790. parentlink = raxNodeLastChildPtr(h);
  791. i += comprsize;
  792. } else {
  793. debugf("Inserting normal node\n");
  794. raxNode **new_parentlink;
  795. raxNode *newh = raxAddChild(h,s[i],&child,&new_parentlink);
  796. if (newh == NULL) goto oom;
  797. h = newh;
  798. memcpy(parentlink,&h,sizeof(h));
  799. parentlink = new_parentlink;
  800. i++;
  801. }
  802. rax->numnodes++;
  803. h = child;
  804. }
  805. raxNode *newh = raxReallocForData(h,data);
  806. if (newh == NULL) goto oom;
  807. h = newh;
  808. if (!h->iskey) rax->numele++;
  809. raxSetData(h,data);
  810. memcpy(parentlink,&h,sizeof(h));
  811. return 1; /* Element inserted. */
  812. oom:
  813. /* This code path handles out of memory after part of the sub-tree was
  814. * already modified. Set the node as a key, and then remove it. However we
  815. * do that only if the node is a terminal node, otherwise if the OOM
  816. * happened reallocating a node in the middle, we don't need to free
  817. * anything. */
  818. if (h->size == 0) {
  819. h->isnull = 1;
  820. h->iskey = 1;
  821. rax->numele++; /* Compensate the next remove. */
  822. assert(raxRemove(rax,s,i,NULL) != 0);
  823. }
  824. errno = ENOMEM;
  825. return 0;
  826. }
  827. /* Overwriting insert. Just a wrapper for raxGenericInsert() that will
  828. * update the element if there is already one for the same key. */
  829. int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
  830. return raxGenericInsert(rax,s,len,data,old,1);
  831. }
  832. /* Non overwriting insert function: this if an element with the same key
  833. * exists, the value is not updated and the function returns 0.
  834. * This is a just a wrapper for raxGenericInsert(). */
  835. int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
  836. return raxGenericInsert(rax,s,len,data,old,0);
  837. }
  838. /* Find a key in the rax, returns raxNotFound special void pointer value
  839. * if the item was not found, otherwise the value associated with the
  840. * item is returned. */
  841. void *raxFind(rax *rax, unsigned char *s, size_t len) {
  842. raxNode *h;
  843. debugf("### Lookup: %.*s\n", (int)len, s);
  844. int splitpos = 0;
  845. size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,NULL);
  846. if (i != len || (h->iscompr && splitpos != 0) || !h->iskey)
  847. return raxNotFound;
  848. return raxGetData(h);
  849. }
  850. /* Return the memory address where the 'parent' node stores the specified
  851. * 'child' pointer, so that the caller can update the pointer with another
  852. * one if needed. The function assumes it will find a match, otherwise the
  853. * operation is an undefined behavior (it will continue scanning the
  854. * memory without any bound checking). */
  855. raxNode **raxFindParentLink(raxNode *parent, raxNode *child) {
  856. raxNode **cp = raxNodeFirstChildPtr(parent);
  857. raxNode *c;
  858. while(1) {
  859. memcpy(&c,cp,sizeof(c));
  860. if (c == child) break;
  861. cp++;
  862. }
  863. return cp;
  864. }
  865. /* Low level child removal from node. The new node pointer (after the child
  866. * removal) is returned. Note that this function does not fix the pointer
  867. * of the parent node in its parent, so this task is up to the caller.
  868. * The function never fails for out of memory. */
  869. raxNode *raxRemoveChild(raxNode *parent, raxNode *child) {
  870. debugnode("raxRemoveChild before", parent);
  871. /* If parent is a compressed node (having a single child, as for definition
  872. * of the data structure), the removal of the child consists into turning
  873. * it into a normal node without children. */
  874. if (parent->iscompr) {
  875. void *data = NULL;
  876. if (parent->iskey) data = raxGetData(parent);
  877. parent->isnull = 0;
  878. parent->iscompr = 0;
  879. parent->size = 0;
  880. if (parent->iskey) raxSetData(parent,data);
  881. debugnode("raxRemoveChild after", parent);
  882. return parent;
  883. }
  884. /* Otherwise we need to scan for the child pointer and memmove()
  885. * accordingly.
  886. *
  887. * 1. To start we seek the first element in both the children
  888. * pointers and edge bytes in the node. */
  889. raxNode **cp = raxNodeFirstChildPtr(parent);
  890. raxNode **c = cp;
  891. unsigned char *e = parent->data;
  892. /* 2. Search the child pointer to remove inside the array of children
  893. * pointers. */
  894. while(1) {
  895. raxNode *aux;
  896. memcpy(&aux,c,sizeof(aux));
  897. if (aux == child) break;
  898. c++;
  899. e++;
  900. }
  901. /* 3. Remove the edge and the pointer by memmoving the remaining children
  902. * pointer and edge bytes one position before. */
  903. int taillen = parent->size - (e - parent->data) - 1;
  904. debugf("raxRemoveChild tail len: %d\n", taillen);
  905. memmove(e,e+1,taillen);
  906. /* Compute the shift, that is the amount of bytes we should move our
  907. * child pointers to the left, since the removal of one edge character
  908. * and the corresponding padding change, may change the layout.
  909. * We just check if in the old version of the node there was at the
  910. * end just a single byte and all padding: in that case removing one char
  911. * will remove a whole sizeof(void*) word. */
  912. size_t shift = ((parent->size+4) % sizeof(void*)) == 1 ? sizeof(void*) : 0;
  913. /* Move the children pointers before the deletion point. */
  914. if (shift)
  915. memmove(((char*)cp)-shift,cp,(parent->size-taillen-1)*sizeof(raxNode**));
  916. /* Move the remaining "tail" pointers at the right position as well. */
  917. size_t valuelen = (parent->iskey && !parent->isnull) ? sizeof(void*) : 0;
  918. memmove(((char*)c)-shift,c+1,taillen*sizeof(raxNode**)+valuelen);
  919. /* 4. Update size. */
  920. parent->size--;
  921. /* realloc the node according to the theoretical memory usage, to free
  922. * data if we are over-allocating right now. */
  923. raxNode *newnode = rax_realloc(parent,raxNodeCurrentLength(parent));
  924. if (newnode) {
  925. debugnode("raxRemoveChild after", newnode);
  926. }
  927. /* Note: if rax_realloc() fails we just return the old address, which
  928. * is valid. */
  929. return newnode ? newnode : parent;
  930. }
  931. /* Remove the specified item. Returns 1 if the item was found and
  932. * deleted, 0 otherwise. */
  933. int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
  934. raxNode *h;
  935. raxStack ts;
  936. debugf("### Delete: %.*s\n", (int)len, s);
  937. raxStackInit(&ts);
  938. int splitpos = 0;
  939. size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,&ts);
  940. if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) {
  941. raxStackFree(&ts);
  942. return 0;
  943. }
  944. if (old) *old = raxGetData(h);
  945. h->iskey = 0;
  946. rax->numele--;
  947. /* If this node has no children, the deletion needs to reclaim the
  948. * no longer used nodes. This is an iterative process that needs to
  949. * walk the three upward, deleting all the nodes with just one child
  950. * that are not keys, until the head of the rax is reached or the first
  951. * node with more than one child is found. */
  952. int trycompress = 0; /* Will be set to 1 if we should try to optimize the
  953. tree resulting from the deletion. */
  954. if (h->size == 0) {
  955. debugf("Key deleted in node without children. Cleanup needed.\n");
  956. raxNode *child = NULL;
  957. while(h != rax->head) {
  958. child = h;
  959. debugf("Freeing child %p [%.*s] key:%d\n", (void*)child,
  960. (int)child->size, (char*)child->data, child->iskey);
  961. rax_free(child);
  962. rax->numnodes--;
  963. h = raxStackPop(&ts);
  964. /* If this node has more then one child, or actually holds
  965. * a key, stop here. */
  966. if (h->iskey || (!h->iscompr && h->size != 1)) break;
  967. }
  968. if (child) {
  969. debugf("Unlinking child %p from parent %p\n",
  970. (void*)child, (void*)h);
  971. raxNode *new = raxRemoveChild(h,child);
  972. if (new != h) {
  973. raxNode *parent = raxStackPeek(&ts);
  974. raxNode **parentlink;
  975. if (parent == NULL) {
  976. parentlink = &rax->head;
  977. } else {
  978. parentlink = raxFindParentLink(parent,h);
  979. }
  980. memcpy(parentlink,&new,sizeof(new));
  981. }
  982. /* If after the removal the node has just a single child
  983. * and is not a key, we need to try to compress it. */
  984. if (new->size == 1 && new->iskey == 0) {
  985. trycompress = 1;
  986. h = new;
  987. }
  988. }
  989. } else if (h->size == 1) {
  990. /* If the node had just one child, after the removal of the key
  991. * further compression with adjacent nodes is potentially possible. */
  992. trycompress = 1;
  993. }
  994. /* Don't try node compression if our nodes pointers stack is not
  995. * complete because of OOM while executing raxLowWalk() */
  996. if (trycompress && ts.oom) trycompress = 0;
  997. /* Recompression: if trycompress is true, 'h' points to a radix tree node
  998. * that changed in a way that could allow to compress nodes in this
  999. * sub-branch. Compressed nodes represent chains of nodes that are not
  1000. * keys and have a single child, so there are two deletion events that
  1001. * may alter the tree so that further compression is needed:
  1002. *
  1003. * 1) A node with a single child was a key and now no longer is a key.
  1004. * 2) A node with two children now has just one child.
  1005. *
  1006. * We try to navigate upward till there are other nodes that can be
  1007. * compressed, when we reach the upper node which is not a key and has
  1008. * a single child, we scan the chain of children to collect the
  1009. * compressible part of the tree, and replace the current node with the
  1010. * new one, fixing the child pointer to reference the first non
  1011. * compressible node.
  1012. *
  1013. * Example of case "1". A tree stores the keys "FOO" = 1 and
  1014. * "FOOBAR" = 2:
  1015. *
  1016. *
  1017. * "FOO" -> "BAR" -> [] (2)
  1018. * (1)
  1019. *
  1020. * After the removal of "FOO" the tree can be compressed as:
  1021. *
  1022. * "FOOBAR" -> [] (2)
  1023. *
  1024. *
  1025. * Example of case "2". A tree stores the keys "FOOBAR" = 1 and
  1026. * "FOOTER" = 2:
  1027. *
  1028. * |B| -> "AR" -> [] (1)
  1029. * "FOO" -> |-|
  1030. * |T| -> "ER" -> [] (2)
  1031. *
  1032. * After the removal of "FOOTER" the resulting tree is:
  1033. *
  1034. * "FOO" -> |B| -> "AR" -> [] (1)
  1035. *
  1036. * That can be compressed into:
  1037. *
  1038. * "FOOBAR" -> [] (1)
  1039. */
  1040. if (trycompress) {
  1041. debugf("After removing %.*s:\n", (int)len, s);
  1042. debugnode("Compression may be needed",h);
  1043. debugf("Seek start node\n");
  1044. /* Try to reach the upper node that is compressible.
  1045. * At the end of the loop 'h' will point to the first node we
  1046. * can try to compress and 'parent' to its parent. */
  1047. raxNode *parent;
  1048. while(1) {
  1049. parent = raxStackPop(&ts);
  1050. if (!parent || parent->iskey ||
  1051. (!parent->iscompr && parent->size != 1)) break;
  1052. h = parent;
  1053. debugnode("Going up to",h);
  1054. }
  1055. raxNode *start = h; /* Compression starting node. */
  1056. /* Scan chain of nodes we can compress. */
  1057. size_t comprsize = h->size;
  1058. int nodes = 1;
  1059. while(h->size != 0) {
  1060. raxNode **cp = raxNodeLastChildPtr(h);
  1061. memcpy(&h,cp,sizeof(h));
  1062. if (h->iskey || (!h->iscompr && h->size != 1)) break;
  1063. /* Stop here if going to the next node would result into
  1064. * a compressed node larger than h->size can hold. */
  1065. if (comprsize + h->size > RAX_NODE_MAX_SIZE) break;
  1066. nodes++;
  1067. comprsize += h->size;
  1068. }
  1069. if (nodes > 1) {
  1070. /* If we can compress, create the new node and populate it. */
  1071. size_t nodesize =
  1072. sizeof(raxNode)+comprsize+raxPadding(comprsize)+sizeof(raxNode*);
  1073. raxNode *new = rax_malloc(nodesize);
  1074. /* An out of memory here just means we cannot optimize this
  1075. * node, but the tree is left in a consistent state. */
  1076. if (new == NULL) {
  1077. raxStackFree(&ts);
  1078. return 1;
  1079. }
  1080. new->iskey = 0;
  1081. new->isnull = 0;
  1082. new->iscompr = 1;
  1083. new->size = comprsize;
  1084. rax->numnodes++;
  1085. /* Scan again, this time to populate the new node content and
  1086. * to fix the new node child pointer. At the same time we free
  1087. * all the nodes that we'll no longer use. */
  1088. comprsize = 0;
  1089. h = start;
  1090. while(h->size != 0) {
  1091. memcpy(new->data+comprsize,h->data,h->size);
  1092. comprsize += h->size;
  1093. raxNode **cp = raxNodeLastChildPtr(h);
  1094. raxNode *tofree = h;
  1095. memcpy(&h,cp,sizeof(h));
  1096. rax_free(tofree); rax->numnodes--;
  1097. if (h->iskey || (!h->iscompr && h->size != 1)) break;
  1098. }
  1099. debugnode("New node",new);
  1100. /* Now 'h' points to the first node that we still need to use,
  1101. * so our new node child pointer will point to it. */
  1102. raxNode **cp = raxNodeLastChildPtr(new);
  1103. memcpy(cp,&h,sizeof(h));
  1104. /* Fix parent link. */
  1105. if (parent) {
  1106. raxNode **parentlink = raxFindParentLink(parent,start);
  1107. memcpy(parentlink,&new,sizeof(new));
  1108. } else {
  1109. rax->head = new;
  1110. }
  1111. debugf("Compressed %d nodes, %d total bytes\n",
  1112. nodes, (int)comprsize);
  1113. }
  1114. }
  1115. raxStackFree(&ts);
  1116. return 1;
  1117. }
  1118. /* This is the core of raxFree(): performs a depth-first scan of the
  1119. * tree and releases all the nodes found. */
  1120. void raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void*)) {
  1121. debugnode("free traversing",n);
  1122. int numchildren = n->iscompr ? 1 : n->size;
  1123. raxNode **cp = raxNodeLastChildPtr(n);
  1124. while(numchildren--) {
  1125. raxNode *child;
  1126. memcpy(&child,cp,sizeof(child));
  1127. raxRecursiveFree(rax,child,free_callback);
  1128. cp--;
  1129. }
  1130. debugnode("free depth-first",n);
  1131. if (free_callback && n->iskey && !n->isnull)
  1132. free_callback(raxGetData(n));
  1133. rax_free(n);
  1134. rax->numnodes--;
  1135. }
  1136. /* Free a whole radix tree, calling the specified callback in order to
  1137. * free the auxiliary data. */
  1138. void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) {
  1139. raxRecursiveFree(rax,rax->head,free_callback);
  1140. assert(rax->numnodes == 0);
  1141. rax_free(rax);
  1142. }
  1143. /* Free a whole radix tree. */
  1144. void raxFree(rax *rax) {
  1145. raxFreeWithCallback(rax,NULL);
  1146. }
  1147. /* ------------------------------- Iterator --------------------------------- */
  1148. /* Initialize a Rax iterator. This call should be performed a single time
  1149. * to initialize the iterator, and must be followed by a raxSeek() call,
  1150. * otherwise the raxPrev()/raxNext() functions will just return EOF. */
  1151. void raxStart(raxIterator *it, rax *rt) {
  1152. it->flags = RAX_ITER_EOF; /* No crash if the iterator is not seeked. */
  1153. it->rt = rt;
  1154. it->key_len = 0;
  1155. it->key = it->key_static_string;
  1156. it->key_max = RAX_ITER_STATIC_LEN;
  1157. it->data = NULL;
  1158. it->node_cb = NULL;
  1159. raxStackInit(&it->stack);
  1160. }
  1161. /* Append characters at the current key string of the iterator 'it'. This
  1162. * is a low level function used to implement the iterator, not callable by
  1163. * the user. Returns 0 on out of memory, otherwise 1 is returned. */
  1164. int raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) {
  1165. if (len == 0) return 1;
  1166. if (it->key_max < it->key_len+len) {
  1167. unsigned char *old = (it->key == it->key_static_string) ? NULL :
  1168. it->key;
  1169. size_t new_max = (it->key_len+len)*2;
  1170. it->key = rax_realloc(old,new_max);
  1171. if (it->key == NULL) {
  1172. it->key = (!old) ? it->key_static_string : old;
  1173. errno = ENOMEM;
  1174. return 0;
  1175. }
  1176. if (old == NULL) memcpy(it->key,it->key_static_string,it->key_len);
  1177. it->key_max = new_max;
  1178. }
  1179. /* Use memmove since there could be an overlap between 's' and
  1180. * it->key when we use the current key in order to re-seek. */
  1181. memmove(it->key+it->key_len,s,len);
  1182. it->key_len += len;
  1183. return 1;
  1184. }
  1185. /* Remove the specified number of chars from the right of the current
  1186. * iterator key. */
  1187. void raxIteratorDelChars(raxIterator *it, size_t count) {
  1188. it->key_len -= count;
  1189. }
  1190. /* Do an iteration step towards the next element. At the end of the step the
  1191. * iterator key will represent the (new) current key. If it is not possible
  1192. * to step in the specified direction since there are no longer elements, the
  1193. * iterator is flagged with RAX_ITER_EOF.
  1194. *
  1195. * If 'noup' is true the function starts directly scanning for the next
  1196. * lexicographically smaller children, and the current node is already assumed
  1197. * to be the parent of the last key node, so the first operation to go back to
  1198. * the parent will be skipped. This option is used by raxSeek() when
  1199. * implementing seeking a non existing element with the ">" or "<" options:
  1200. * the starting node is not a key in that particular case, so we start the scan
  1201. * from a node that does not represent the key set.
  1202. *
  1203. * The function returns 1 on success or 0 on out of memory. */
  1204. int raxIteratorNextStep(raxIterator *it, int noup) {
  1205. if (it->flags & RAX_ITER_EOF) {
  1206. return 1;
  1207. } else if (it->flags & RAX_ITER_JUST_SEEKED) {
  1208. it->flags &= ~RAX_ITER_JUST_SEEKED;
  1209. return 1;
  1210. }
  1211. /* Save key len, stack items and the node where we are currently
  1212. * so that on iterator EOF we can restore the current key and state. */
  1213. size_t orig_key_len = it->key_len;
  1214. size_t orig_stack_items = it->stack.items;
  1215. raxNode *orig_node = it->node;
  1216. while(1) {
  1217. int children = it->node->iscompr ? 1 : it->node->size;
  1218. if (!noup && children) {
  1219. debugf("GO DEEPER\n");
  1220. /* Seek the lexicographically smaller key in this subtree, which
  1221. * is the first one found always going towards the first child
  1222. * of every successive node. */
  1223. if (!raxStackPush(&it->stack,it->node)) return 0;
  1224. raxNode **cp = raxNodeFirstChildPtr(it->node);
  1225. if (!raxIteratorAddChars(it,it->node->data,
  1226. it->node->iscompr ? it->node->size : 1)) return 0;
  1227. memcpy(&it->node,cp,sizeof(it->node));
  1228. /* Call the node callback if any, and replace the node pointer
  1229. * if the callback returns true. */
  1230. if (it->node_cb && it->node_cb(&it->node))
  1231. memcpy(cp,&it->node,sizeof(it->node));
  1232. /* For "next" step, stop every time we find a key along the
  1233. * way, since the key is lexicographically smaller compared to
  1234. * what follows in the sub-children. */
  1235. if (it->node->iskey) {
  1236. it->data = raxGetData(it->node);
  1237. return 1;
  1238. }
  1239. } else {
  1240. /* If we finished exploring the previous sub-tree, switch to the
  1241. * new one: go upper until a node is found where there are
  1242. * children representing keys lexicographically greater than the
  1243. * current key. */
  1244. while(1) {
  1245. int old_noup = noup;
  1246. /* Already on head? Can't go up, iteration finished. */
  1247. if (!noup && it->node == it->rt->head) {
  1248. it->flags |= RAX_ITER_EOF;
  1249. it->stack.items = orig_stack_items;
  1250. it->key_len = orig_key_len;
  1251. it->node = orig_node;
  1252. return 1;
  1253. }
  1254. /* If there are no children at the current node, try parent's
  1255. * next child. */
  1256. unsigned char prevchild = it->key[it->key_len-1];
  1257. if (!noup) {
  1258. it->node = raxStackPop(&it->stack);
  1259. } else {
  1260. noup = 0;
  1261. }
  1262. /* Adjust the current key to represent the node we are
  1263. * at. */
  1264. int todel = it->node->iscompr ? it->node->size : 1;
  1265. raxIteratorDelChars(it,todel);
  1266. /* Try visiting the next child if there was at least one
  1267. * additional child. */
  1268. if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {
  1269. raxNode **cp = raxNodeFirstChildPtr(it->node);
  1270. int i = 0;
  1271. while (i < it->node->size) {
  1272. debugf("SCAN NEXT %c\n", it->node->data[i]);
  1273. if (it->node->data[i] > prevchild) break;
  1274. i++;
  1275. cp++;
  1276. }
  1277. if (i != it->node->size) {
  1278. debugf("SCAN found a new node\n");
  1279. raxIteratorAddChars(it,it->node->data+i,1);
  1280. if (!raxStackPush(&it->stack,it->node)) return 0;
  1281. memcpy(&it->node,cp,sizeof(it->node));
  1282. /* Call the node callback if any, and replace the node
  1283. * pointer if the callback returns true. */
  1284. if (it->node_cb && it->node_cb(&it->node))
  1285. memcpy(cp,&it->node,sizeof(it->node));
  1286. if (it->node->iskey) {
  1287. it->data = raxGetData(it->node);
  1288. return 1;
  1289. }
  1290. break;
  1291. }
  1292. }
  1293. }
  1294. }
  1295. }
  1296. }
  1297. /* Seek the greatest key in the subtree at the current node. Return 0 on
  1298. * out of memory, otherwise 1. This is a helper function for different
  1299. * iteration functions below. */
  1300. int raxSeekGreatest(raxIterator *it) {
  1301. while(it->node->size) {
  1302. if (it->node->iscompr) {
  1303. if (!raxIteratorAddChars(it,it->node->data,
  1304. it->node->size)) return 0;
  1305. } else {
  1306. if (!raxIteratorAddChars(it,it->node->data+it->node->size-1,1))
  1307. return 0;
  1308. }
  1309. raxNode **cp = raxNodeLastChildPtr(it->node);
  1310. if (!raxStackPush(&it->stack,it->node)) return 0;
  1311. memcpy(&it->node,cp,sizeof(it->node));
  1312. }
  1313. return 1;
  1314. }
  1315. /* Like raxIteratorNextStep() but implements an iteration step moving
  1316. * to the lexicographically previous element. The 'noup' option has a similar
  1317. * effect to the one of raxIteratorNextStep(). */
  1318. int raxIteratorPrevStep(raxIterator *it, int noup) {
  1319. if (it->flags & RAX_ITER_EOF) {
  1320. return 1;
  1321. } else if (it->flags & RAX_ITER_JUST_SEEKED) {
  1322. it->flags &= ~RAX_ITER_JUST_SEEKED;
  1323. return 1;
  1324. }
  1325. /* Save key len, stack items and the node where we are currently
  1326. * so that on iterator EOF we can restore the current key and state. */
  1327. size_t orig_key_len = it->key_len;
  1328. size_t orig_stack_items = it->stack.items;
  1329. raxNode *orig_node = it->node;
  1330. while(1) {
  1331. int old_noup = noup;
  1332. /* Already on head? Can't go up, iteration finished. */
  1333. if (!noup && it->node == it->rt->head) {
  1334. it->flags |= RAX_ITER_EOF;
  1335. it->stack.items = orig_stack_items;
  1336. it->key_len = orig_key_len;
  1337. it->node = orig_node;
  1338. return 1;
  1339. }
  1340. unsigned char prevchild = it->key[it->key_len-1];
  1341. if (!noup) {
  1342. it->node = raxStackPop(&it->stack);
  1343. } else {
  1344. noup = 0;
  1345. }
  1346. /* Adjust the current key to represent the node we are
  1347. * at. */
  1348. int todel = it->node->iscompr ? it->node->size : 1;
  1349. raxIteratorDelChars(it,todel);
  1350. /* Try visiting the prev child if there is at least one
  1351. * child. */
  1352. if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {
  1353. raxNode **cp = raxNodeLastChildPtr(it->node);
  1354. int i = it->node->size-1;
  1355. while (i >= 0) {
  1356. debugf("SCAN PREV %c\n", it->node->data[i]);
  1357. if (it->node->data[i] < prevchild) break;
  1358. i--;
  1359. cp--;
  1360. }
  1361. /* If we found a new subtree to explore in this node,
  1362. * go deeper following all the last children in order to
  1363. * find the key lexicographically greater. */
  1364. if (i != -1) {
  1365. debugf("SCAN found a new node\n");
  1366. /* Enter the node we just found. */
  1367. if (!raxIteratorAddChars(it,it->node->data+i,1)) return 0;
  1368. if (!raxStackPush(&it->stack,it->node)) return 0;
  1369. memcpy(&it->node,cp,sizeof(it->node));
  1370. /* Seek sub-tree max. */
  1371. if (!raxSeekGreatest(it)) return 0;
  1372. }
  1373. }
  1374. /* Return the key: this could be the key we found scanning a new
  1375. * subtree, or if we did not find a new subtree to explore here,
  1376. * before giving up with this node, check if it's a key itself. */
  1377. if (it->node->iskey) {
  1378. it->data = raxGetData(it->node);
  1379. return 1;
  1380. }
  1381. }
  1382. }
  1383. /* Seek an iterator at the specified element.
  1384. * Return 0 if the seek failed for syntax error or out of memory. Otherwise
  1385. * 1 is returned. When 0 is returned for out of memory, errno is set to
  1386. * the ENOMEM value. */
  1387. int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) {
  1388. int eq = 0, lt = 0, gt = 0, first = 0, last = 0;
  1389. it->stack.items = 0; /* Just resetting. Initialized by raxStart(). */
  1390. it->flags |= RAX_ITER_JUST_SEEKED;
  1391. it->flags &= ~RAX_ITER_EOF;
  1392. it->key_len = 0;
  1393. it->node = NULL;
  1394. /* Set flags according to the operator used to perform the seek. */
  1395. if (op[0] == '>') {
  1396. gt = 1;
  1397. if (op[1] == '=') eq = 1;
  1398. } else if (op[0] == '<') {
  1399. lt = 1;
  1400. if (op[1] == '=') eq = 1;
  1401. } else if (op[0] == '=') {
  1402. eq = 1;
  1403. } else if (op[0] == '^') {
  1404. first = 1;
  1405. } else if (op[0] == '$') {
  1406. last = 1;
  1407. } else {
  1408. errno = 0;
  1409. return 0; /* Error. */
  1410. }
  1411. /* If there are no elements, set the EOF condition immediately and
  1412. * return. */
  1413. if (it->rt->numele == 0) {
  1414. it->flags |= RAX_ITER_EOF;
  1415. return 1;
  1416. }
  1417. if (first) {
  1418. /* Seeking the first key greater or equal to the empty string
  1419. * is equivalent to seeking the smaller key available. */
  1420. return raxSeek(it,">=",NULL,0);
  1421. }
  1422. if (last) {
  1423. /* Find the greatest key taking always the last child till a
  1424. * final node is found. */
  1425. it->node = it->rt->head;
  1426. if (!raxSeekGreatest(it)) return 0;
  1427. assert(it->node->iskey);
  1428. it->data = raxGetData(it->node);
  1429. return 1;
  1430. }
  1431. /* We need to seek the specified key. What we do here is to actually
  1432. * perform a lookup, and later invoke the prev/next key code that
  1433. * we already use for iteration. */
  1434. int splitpos = 0;
  1435. size_t i = raxLowWalk(it->rt,ele,len,&it->node,NULL,&splitpos,&it->stack);
  1436. /* Return OOM on incomplete stack info. */
  1437. if (it->stack.oom) return 0;
  1438. if (eq && i == len && (!it->node->iscompr || splitpos == 0) &&
  1439. it->node->iskey)
  1440. {
  1441. /* We found our node, since the key matches and we have an
  1442. * "equal" condition. */
  1443. if (!raxIteratorAddChars(it,ele,len)) return 0; /* OOM. */
  1444. it->data = raxGetData(it->node);
  1445. } else if (lt || gt) {
  1446. /* Exact key not found or eq flag not set. We have to set as current
  1447. * key the one represented by the node we stopped at, and perform
  1448. * a next/prev operation to seek. */
  1449. raxIteratorAddChars(it, ele, i-splitpos);
  1450. /* We need to set the iterator in the correct state to call next/prev
  1451. * step in order to seek the desired element. */
  1452. debugf("After initial seek: i=%d len=%d key=%.*s\n",
  1453. (int)i, (int)len, (int)it->key_len, it->key);
  1454. if (i != len && !it->node->iscompr) {
  1455. /* If we stopped in the middle of a normal node because of a
  1456. * mismatch, add the mismatching character to the current key
  1457. * and call the iterator with the 'noup' flag so that it will try
  1458. * to seek the next/prev child in the current node directly based
  1459. * on the mismatching character. */
  1460. if (!raxIteratorAddChars(it,ele+i,1)) return 0;
  1461. debugf("Seek normal node on mismatch: %.*s\n",
  1462. (int)it->key_len, (char*)it->key);
  1463. it->flags &= ~RAX_ITER_JUST_SEEKED;
  1464. if (lt && !raxIteratorPrevStep(it,1)) return 0;
  1465. if (gt && !raxIteratorNextStep(it,1)) return 0;
  1466. it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
  1467. } else if (i != len && it->node->iscompr) {
  1468. debugf("Compressed mismatch: %.*s\n",
  1469. (int)it->key_len, (char*)it->key);
  1470. /* In case of a mismatch within a compressed node. */
  1471. int nodechar = it->node->data[splitpos];
  1472. int keychar = ele[i];
  1473. it->flags &= ~RAX_ITER_JUST_SEEKED;
  1474. if (gt) {
  1475. /* If the key the compressed node represents is greater
  1476. * than our seek element, continue forward, otherwise set the
  1477. * state in order to go back to the next sub-tree. */
  1478. if (nodechar > keychar) {
  1479. if (!raxIteratorNextStep(it,0)) return 0;
  1480. } else {
  1481. if (!raxIteratorAddChars(it,it->node->data,it->node->size))
  1482. return 0;
  1483. if (!raxIteratorNextStep(it,1)) return 0;
  1484. }
  1485. }
  1486. if (lt) {
  1487. /* If the key the compressed node represents is smaller
  1488. * than our seek element, seek the greater key in this
  1489. * subtree, otherwise set the state in order to go back to
  1490. * the previous sub-tree. */
  1491. if (nodechar < keychar) {
  1492. if (!raxSeekGreatest(it)) return 0;
  1493. it->data = raxGetData(it->node);
  1494. } else {
  1495. if (!raxIteratorAddChars(it,it->node->data,it->node->size))
  1496. return 0;
  1497. if (!raxIteratorPrevStep(it,1)) return 0;
  1498. }
  1499. }
  1500. it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
  1501. } else {
  1502. debugf("No mismatch: %.*s\n",
  1503. (int)it->key_len, (char*)it->key);
  1504. /* If there was no mismatch we are into a node representing the
  1505. * key, (but which is not a key or the seek operator does not
  1506. * include 'eq'), or we stopped in the middle of a compressed node
  1507. * after processing all the key. Continue iterating as this was
  1508. * a legitimate key we stopped at. */
  1509. it->flags &= ~RAX_ITER_JUST_SEEKED;
  1510. if (it->node->iscompr && it->node->iskey && splitpos && lt) {
  1511. /* If we stopped in the middle of a compressed node with
  1512. * perfect match, and the condition is to seek a key "<" than
  1513. * the specified one, then if this node is a key it already
  1514. * represents our match. For instance we may have nodes:
  1515. *
  1516. * "f" -> "oobar" = 1 -> "" = 2
  1517. *
  1518. * Representing keys "f" = 1, "foobar" = 2. A seek for
  1519. * the key < "foo" will stop in the middle of the "oobar"
  1520. * node, but will be our match, representing the key "f".
  1521. *
  1522. * So in that case, we don't seek backward. */
  1523. it->data = raxGetData(it->node);
  1524. } else {
  1525. if (gt && !raxIteratorNextStep(it,0)) return 0;
  1526. if (lt && !raxIteratorPrevStep(it,0)) return 0;
  1527. }
  1528. it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
  1529. }
  1530. } else {
  1531. /* If we are here just eq was set but no match was found. */
  1532. it->flags |= RAX_ITER_EOF;
  1533. return 1;
  1534. }
  1535. return 1;
  1536. }
  1537. /* Go to the next element in the scope of the iterator 'it'.
  1538. * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
  1539. * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
  1540. int raxNext(raxIterator *it) {
  1541. if (!raxIteratorNextStep(it,0)) {
  1542. errno = ENOMEM;
  1543. return 0;
  1544. }
  1545. if (it->flags & RAX_ITER_EOF) {
  1546. errno = 0;
  1547. return 0;
  1548. }
  1549. return 1;
  1550. }
  1551. /* Go to the previous element in the scope of the iterator 'it'.
  1552. * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
  1553. * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
  1554. int raxPrev(raxIterator *it) {
  1555. if (!raxIteratorPrevStep(it,0)) {
  1556. errno = ENOMEM;
  1557. return 0;
  1558. }
  1559. if (it->flags & RAX_ITER_EOF) {
  1560. errno = 0;
  1561. return 0;
  1562. }
  1563. return 1;
  1564. }
  1565. /* Perform a random walk starting in the current position of the iterator.
  1566. * Return 0 if the tree is empty or on out of memory. Otherwise 1 is returned
  1567. * and the iterator is set to the node reached after doing a random walk
  1568. * of 'steps' steps. If the 'steps' argument is 0, the random walk is performed
  1569. * using a random number of steps between 1 and two times the logarithm of
  1570. * the number of elements.
  1571. *
  1572. * NOTE: if you use this function to generate random elements from the radix
  1573. * tree, expect a disappointing distribution. A random walk produces good
  1574. * random elements if the tree is not sparse, however in the case of a radix
  1575. * tree certain keys will be reported much more often than others. At least
  1576. * this function should be able to explore every possible element eventually. */
  1577. int raxRandomWalk(raxIterator *it, size_t steps) {
  1578. if (it->rt->numele == 0) {
  1579. it->flags |= RAX_ITER_EOF;
  1580. return 0;
  1581. }
  1582. if (steps == 0) {
  1583. size_t fle = 1+floor(log(it->rt->numele));
  1584. fle *= 2;
  1585. steps = 1 + rand() % fle;
  1586. }
  1587. raxNode *n = it->node;
  1588. while(steps > 0 || !n->iskey) {
  1589. int numchildren = n->iscompr ? 1 : n->size;
  1590. int r = rand() % (numchildren+(n != it->rt->head));
  1591. if (r == numchildren) {
  1592. /* Go up to parent. */
  1593. n = raxStackPop(&it->stack);
  1594. int todel = n->iscompr ? n->size : 1;
  1595. raxIteratorDelChars(it,todel);
  1596. } else {
  1597. /* Select a random child. */
  1598. if (n->iscompr) {
  1599. if (!raxIteratorAddChars(it,n->data,n->size)) return 0;
  1600. } else {
  1601. if (!raxIteratorAddChars(it,n->data+r,1)) return 0;
  1602. }
  1603. raxNode **cp = raxNodeFirstChildPtr(n)+r;
  1604. if (!raxStackPush(&it->stack,n)) return 0;
  1605. memcpy(&n,cp,sizeof(n));
  1606. }
  1607. if (n->iskey) steps--;
  1608. }
  1609. it->node = n;
  1610. it->data = raxGetData(it->node);
  1611. return 1;
  1612. }
  1613. /* Compare the key currently pointed by the iterator to the specified
  1614. * key according to the specified operator. Returns 1 if the comparison is
  1615. * true, otherwise 0 is returned. */
  1616. int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len) {
  1617. int eq = 0, lt = 0, gt = 0;
  1618. if (op[0] == '=' || op[1] == '=') eq = 1;
  1619. if (op[0] == '>') gt = 1;
  1620. else if (op[0] == '<') lt = 1;
  1621. else if (op[1] != '=') return 0; /* Syntax error. */
  1622. size_t minlen = key_len < iter->key_len ? key_len : iter->key_len;
  1623. int cmp = memcmp(iter->key,key,minlen);
  1624. /* Handle == */
  1625. if (lt == 0 && gt == 0) return cmp == 0 && key_len == iter->key_len;
  1626. /* Handle >, >=, <, <= */
  1627. if (cmp == 0) {
  1628. /* Same prefix: longer wins. */
  1629. if (eq && key_len == iter->key_len) return 1;
  1630. else if (lt) return iter->key_len < key_len;
  1631. else if (gt) return iter->key_len > key_len;
  1632. else return 0; /* Avoid warning, just 'eq' is handled before. */
  1633. } else if (cmp > 0) {
  1634. return gt ? 1 : 0;
  1635. } else /* (cmp < 0) */ {
  1636. return lt ? 1 : 0;
  1637. }
  1638. }
  1639. /* Free the iterator. */
  1640. void raxStop(raxIterator *it) {
  1641. if (it->key != it->key_static_string) rax_free(it->key);
  1642. raxStackFree(&it->stack);
  1643. }
  1644. /* Return if the iterator is in an EOF state. This happens when raxSeek()
  1645. * failed to seek an appropriate element, so that raxNext() or raxPrev()
  1646. * will return zero, or when an EOF condition was reached while iterating
  1647. * with raxNext() and raxPrev(). */
  1648. int raxEOF(raxIterator *it) {
  1649. return it->flags & RAX_ITER_EOF;
  1650. }
  1651. /* Return the number of elements inside the radix tree. */
  1652. uint64_t raxSize(rax *rax) {
  1653. return rax->numele;
  1654. }
  1655. /* ----------------------------- Introspection ------------------------------ */
  1656. /* This function is mostly used for debugging and learning purposes.
  1657. * It shows an ASCII representation of a tree on standard output, outline
  1658. * all the nodes and the contained keys.
  1659. *
  1660. * The representation is as follow:
  1661. *
  1662. * "foobar" (compressed node)
  1663. * [abc] (normal node with three children)
  1664. * [abc]=0x12345678 (node is a key, pointing to value 0x12345678)
  1665. * [] (a normal empty node)
  1666. *
  1667. * Children are represented in new indented lines, each children prefixed by
  1668. * the "`-(x)" string, where "x" is the edge byte.
  1669. *
  1670. * [abc]
  1671. * `-(a) "ladin"
  1672. * `-(b) [kj]
  1673. * `-(c) []
  1674. *
  1675. * However when a node has a single child the following representation
  1676. * is used instead:
  1677. *
  1678. * [abc] -> "ladin" -> []
  1679. */
  1680. /* The actual implementation of raxShow(). */
  1681. void raxRecursiveShow(int level, int lpad, raxNode *n) {
  1682. char s = n->iscompr ? '"' : '[';
  1683. char e = n->iscompr ? '"' : ']';
  1684. int numchars = printf("%c%.*s%c", s, n->size, n->data, e);
  1685. if (n->iskey) {
  1686. numchars += printf("=%p",raxGetData(n));
  1687. }
  1688. int numchildren = n->iscompr ? 1 : n->size;
  1689. /* Note that 7 and 4 magic constants are the string length
  1690. * of " `-(x) " and " -> " respectively. */
  1691. if (level) {
  1692. lpad += (numchildren > 1) ? 7 : 4;
  1693. if (numchildren == 1) lpad += numchars;
  1694. }
  1695. raxNode **cp = raxNodeFirstChildPtr(n);
  1696. for (int i = 0; i < numchildren; i++) {
  1697. char *branch = " `-(%c) ";
  1698. if (numchildren > 1) {
  1699. printf("\n");
  1700. for (int j = 0; j < lpad; j++) putchar(' ');
  1701. printf(branch,n->data[i]);
  1702. } else {
  1703. printf(" -> ");
  1704. }
  1705. raxNode *child;
  1706. memcpy(&child,cp,sizeof(child));
  1707. raxRecursiveShow(level+1,lpad,child);
  1708. cp++;
  1709. }
  1710. }
  1711. /* Show a tree, as outlined in the comment above. */
  1712. void raxShow(rax *rax) {
  1713. raxRecursiveShow(0,0,rax->head);
  1714. putchar('\n');
  1715. }
  1716. /* Used by debugnode() macro to show info about a given node. */
  1717. void raxDebugShowNode(const char *msg, raxNode *n) {
  1718. if (raxDebugMsg == 0) return;
  1719. printf("%s: %p [%.*s] key:%u size:%u children:",
  1720. msg, (void*)n, (int)n->size, (char*)n->data, n->iskey, n->size);
  1721. int numcld = n->iscompr ? 1 : n->size;
  1722. raxNode **cldptr = raxNodeLastChildPtr(n) - (numcld-1);
  1723. while(numcld--) {
  1724. raxNode *child;
  1725. memcpy(&child,cldptr,sizeof(child));
  1726. cldptr++;
  1727. printf("%p ", (void*)child);
  1728. }
  1729. printf("\n");
  1730. fflush(stdout);
  1731. }
  1732. /* Touch all the nodes of a tree returning a check sum. This is useful
  1733. * in order to make Valgrind detect if there is something wrong while
  1734. * reading the data structure.
  1735. *
  1736. * This function was used in order to identify Rax bugs after a big refactoring
  1737. * using this technique:
  1738. *
  1739. * 1. The rax-test is executed using Valgrind, adding a printf() so that for
  1740. * the fuzz tester we see what iteration in the loop we are in.
  1741. * 2. After every modification of the radix tree made by the fuzz tester
  1742. * in rax-test.c, we add a call to raxTouch().
  1743. * 3. Now as soon as an operation will corrupt the tree, raxTouch() will
  1744. * detect it (via Valgrind) immediately. We can add more calls to narrow
  1745. * the state.
  1746. * 4. At this point a good idea is to enable Rax debugging messages immediately
  1747. * before the moment the tree is corrupted, to see what happens.
  1748. */
  1749. unsigned long raxTouch(raxNode *n) {
  1750. debugf("Touching %p\n", (void*)n);
  1751. unsigned long sum = 0;
  1752. if (n->iskey) {
  1753. sum += (unsigned long)raxGetData(n);
  1754. }
  1755. int numchildren = n->iscompr ? 1 : n->size;
  1756. raxNode **cp = raxNodeFirstChildPtr(n);
  1757. int count = 0;
  1758. for (int i = 0; i < numchildren; i++) {
  1759. if (numchildren > 1) {
  1760. sum += (long)n->data[i];
  1761. }
  1762. raxNode *child;
  1763. memcpy(&child,cp,sizeof(child));
  1764. if (child == (void*)0x65d1760) count++;
  1765. if (count > 1) exit(1);
  1766. sum += raxTouch(child);
  1767. cp++;
  1768. }
  1769. return sum;
  1770. }