RTree源代码——C语言实现
cheungmine
一、什么是RTree
“R树是B树向多维空间发展的另一种形式,它将空间对象按范围划分,每个结点都对应一个区域和一个磁盘页,非叶结点的磁盘页中存储其所有子结点的区域范围,非叶结点的所有子结点的区域都落在它的区域范围之内;叶结点的磁盘页中存储其区域范围之内的所有空间对象的外接矩形。每个结点所能拥有的子结点数目有上、下限,下限保证对磁盘空间的有效利用,上限保证每个结点对应一个磁盘页,当插入新的结点导致某结点要求的空间大于一个磁盘页时,该结点一分为二。R树是一种动态索引结构,即:它的查询可与插入或删除同时进行,而且不需要定期地对树结构进行重新组织。当更新一个关系并且在这个关系上正在做扫描时,如果更新影响到了扫描操作的任何一个页,我们需要检查扫描并且修复它。”
其实上面的话,你也不用多做研究。理解RTree是范围树,适合做空间索引(快速查找)。更多的关于RTree的知识我也没时间写在这里,我只知道原理,然后提供了下面的代码(经过我修改,看起来更顺眼些)。一定要用它。感谢算法的原作者和发明算法的人。“帝国大厦的建立,人才更在资本之上”啊!
二、RTree的实现代码
本文的代码来源于GRASS,我根据自己的习惯,作了适当的修改,把原来多个文件合成了2个文件(rtree.h和rtree.c)。本文提供了完整的rtree实现代码和一个简单的测试代码(test.c)。如果你发现什么问题,请及时提交评论,以利改正。
RTree.h文件:
* RTree.H
*
* MODULE: R-Tree library
*
* AUTHOR(S): Antonin Guttman - original code
* Daniel Green (green@superliminal.com) - major clean-up
* and implementation of bounding spheres
*
* PURPOSE: Multi Dimensional Index
*
* COPYRIGHT: (C) 2001 by the GRASS Development Team
*
* This program is free software under the GNU General Public
* License (>=v2). Read the file COPYING that comes with GRASS
* for details.
*
* LAST MODIFY: ZhangLiang (cheungmine@gmail.com) - 2007-11
*****************************************************************************/
#ifndef RTREE_H_INCLUDED
#define RTREE_H_INCLUDED /* PAGE_SIZE is normally the natural page size of the machine */
#define PAGE_SIZE 512
#define DIMS_NUMB 3 /* number of dimensions */
#define SIDES_NUMB 2*DIMS_NUMB /* typedef float REALTYPE; */
typedef double REALTYPE;
#ifndef TRUE
#define FALSE 0
#endif
typedef
struct _RTREEMBR{
REALTYPE bound[SIDES_NUMB]; /* xmin,ymin,...,xmax,ymax,... */
}RTREEMBR;
typedef
struct _RTREEBRANCH{
RTREEMBR mbr;
struct _RTREENODE *child; /* mbr id */
}RTREEBRANCH; /* max branching factor of a node */
#define MAXCARD (int)((PAGE_SIZE-(2*sizeof(int))) / sizeof(RTREEBRANCH))
typedef
struct _RTREENODE{
int count;
int level; /* 0 is leaf, others positive */
RTREEBRANCH branch[MAXCARD];
}RTREENODE;
typedef
struct _RTREELISTNODE{
struct _RTREELISTNODE *next;
RTREENODE *node;
}RTREELISTNODE; /*
* If passed to a tree search, this callback function will be called
* with the ID of each data mbr that overlaps the search mbr
* plus whatever user specific pointer was passed to the search.
* It can terminate the search early by returning 0 in which case
* the search will return the number of hits found up to that point.
*/
typedef int (*pfnSearchHitCallback)(int id, void* pfnParam); int RTreeSetNodeMax(int new_max); int RTreeSetLeafMax(int new_max); int RTreeGetNodeMax(void); int RTreeGetLeafMax(void); /**
* Initialize a rectangle to have all 0 coordinates.
*/
void RTreeInitRect( RTREEMBR *rc); /**
* Return a mbr whose first low side is higher than its opposite side -
* interpreted as an undefined mbr.
*/
RTREEMBR RTreeNullRect(void); /**
* Print out the data for a rectangle.
*/
void RTreePrintRect( RTREEMBR *rc, int depth ); /**
* Calculate the 2-dimensional area of a rectangle
*/
REALTYPE RTreeRectArea( RTREEMBR *rc ); /**
* Calculate the n-dimensional volume of a rectangle
*/
REALTYPE RTreeRectVolume( RTREEMBR *rc ); /**
* Calculate the n-dimensional volume of the bounding sphere of a rectangle
* The exact volume of the bounding sphere for the given RTREEMBR.
*/
REALTYPE RTreeRectSphericalVolume( RTREEMBR *rc ); /**
* Calculate the n-dimensional surface area of a rectangle
*/
REALTYPE RTreeRectSurfaceArea( RTREEMBR *rc ); /**
* Combine two rectangles, make one that includes both.
*/
RTREEMBR RTreeCombineRect( RTREEMBR *rc1, RTREEMBR *rc2 ); /**
* Decide whether two rectangles overlap.
*/
int RTreeOverlap( RTREEMBR *rc1, RTREEMBR *rc2); /**
* Decide whether rectangle r is contained in rectangle s.
*/
int RTreeContained( RTREEMBR *r, RTREEMBR *s); /**
* Split a node.
* Divides the nodes branches and the extra one between two nodes.
* Old node is one of the new ones, and one really new one is created.
* Tries more than one method for choosing a partition, uses best result.
*/
void RTreeSplitNode( RTREENODE *node, RTREEBRANCH *br, RTREENODE **new_node); /**
* Initialize a RTREENODE structure.
*/
void RTreeInitNode( RTREENODE *node ); /**
* Make a new node and initialize to have all branch cells empty.
*/
RTREENODE *RTreeNewNode(void); void RTreeFreeNode( RTREENODE *node ); /**
* Print out the data in a node.
*/
void RTreePrintNode( RTREENODE *node, int depth ); /**
* Find the smallest rectangle that includes all rectangles in branches of a node.
*/
RTREEMBR RTreeNodeCover( RTREENODE *node ); /**
* Pick a branch. Pick the one that will need the smallest increase
* in area to accomodate the new rectangle. This will result in the
* least total area for the covering rectangles in the current node.
* In case of a tie, pick the one which was smaller before, to get
* the best resolution when searching.
*/
int RTreePickBranch( RTREEMBR *rc, RTREENODE *node); /**
* Add a branch to a node. Split the node if necessary.
* Returns 0 if node not split. Old node updated.
* Returns 1 if node split, sets *new_node to address of new node.
* Old node updated, becomes one of two.
*/
int RTreeAddBranch( RTREEBRANCH *br, RTREENODE *node, RTREENODE **new_node); /**
* Disconnect a dependent node.
*/
void RTreeDisconnectBranch( RTREENODE *node, int i ); /**
* Destroy (free) node recursively.
*/
void RTreeDestroyNode ( RTREENODE *node ); /**
* Create a new rtree index, empty. Consists of a single node.
*/
RTREENODE * RTreeCreate(void); /**
* Destroy a rtree root must be a root of rtree. Free all memory.
*/
void RTreeDestroy(RTREENODE *root); /**
* Search in an index tree or subtree for all data rectangles that overlap the argument rectangle.
* Return the number of qualifying data rects.
*/
int RTreeSearch( RTREENODE *node, RTREEMBR *rc, pfnSearchHitCallback pfnSHCB, void* pfnParam); /**
* Insert a data rectangle into an index structure.
* RTreeInsertRect provides for splitting the root;
* returns 1 if root was split, 0 if it was not.
* The level argument specifies the number of steps up from the leaf
* level to insert; e.g. a data rectangle goes in at level = 0.
* _RTreeInsertRect does the recursion.
*/
int RTreeInsertRect( RTREEMBR *rc, int tid, RTREENODE **root, int level); /**
* Delete a data rectangle from an index structure.
* Pass in a pointer to a RTREEMBR, the tid of the record, ptr to ptr to root node.
* Returns 1 if record not found, 0 if success.
* RTreeDeleteRect provides for eliminating the root.
*/
int RTreeDeleteRect( RTREEMBR *rc, int tid, RTREENODE **root); #endif /* RTREE_H_INCLUDED */
RTree.C文件:
* RTree.C
*
* MODULE: R-Tree library
*
* AUTHOR(S): Antonin Guttman - original code
* Daniel Green (green@superliminal.com) - major clean-up
* and implementation of bounding spheres
*
* PURPOSE: Multi Dimensional Index
*
* COPYRIGHT: (C) 2001 by the GRASS Development Team
*
* This program is free software under the GNU General Public
* License (>=v2). Read the file COPYING that comes with GRASS
* for details.
*
* LAST MODIFY: ZhangLiang (cheungmine@gmail.com) - 2007-11
*****************************************************************************/
#include
<stdio.h>#include <stdlib.h>
#include <assert.h>
#include <float.h>
#include <math.h>
#include
"rtree.h" #define METHODS 1 /* variables for finding a partition */typedef struct _RTREEPARTITION
{
int partition[MAXCARD+1];
int total;
int minfill;
int taken[MAXCARD+1];
int count[2];
RTREEMBR cover[2];
REALTYPE area[2];
} RTREEPARTITION;
RTREEBRANCH BranchBuf[MAXCARD
+1];int BranchCount;
RTREEMBR CoverSplit;
REALTYPE CoverSplitArea;
RTREEPARTITION Partitions[METHODS]; #define BIG_NUM (FLT_MAX/4.0) #define INVALID_RECT(x) ((x)->bound[0] > (x)->bound[DIMS_NUMB])
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b)) int NODECARD = MAXCARD;
int LEAFCARD = MAXCARD; /* balance criteria for node splitting */
/* NOTE: can be changed if needed. */
#define MINNODEFILL (NODECARD / 2)
#define MINLEAFFILL (LEAFCARD / 2) #define MAXKIDS(n) ((n)->level > 0 ? NODECARD : LEAFCARD)
#define MINFILL(n) ((n)->level > 0 ? MINNODEFILL : MINLEAFFILL) static int set_max(int *which, int new_max)
{
if(2 > new_max || new_max > MAXCARD)
return 0;
*which = new_max;
return 1;
} /**
* Load branch buffer with branches from full node plus the extra branch.
*/
static void _RTreeGetBranches( RTREENODE *node, RTREEBRANCH *br)
{
int i;
assert(node
&& br);/* load the branch buffer */
for (i=0; i<MAXKIDS(node); i++)
{
assert(node->branch[i].child); /* n should have every entry full */
BranchBuf[i] = node->branch[i];
}
BranchBuf[MAXKIDS(node)]
= *br;BranchCount = MAXKIDS(node) + 1;
/* calculate mbr containing all in the set */
CoverSplit = BranchBuf[0].mbr;
for (i=1; i<MAXKIDS(node)+1; i++)
{
CoverSplit = RTreeCombineRect(&CoverSplit, &BranchBuf[i].mbr);
}
CoverSplitArea
= RTreeRectSphericalVolume(&CoverSplit);RTreeInitNode(node);
} /**
* Put a branch in one of the groups.
*/
static void _RTreeClassify(int i, int group, RTREEPARTITION *p)
{
assert(p);
assert(!p->taken[i]);
p
->partition[i] = group;p->taken[i] = TRUE;
if (p->count[group] == 0)
p->cover[group] = BranchBuf[i].mbr;
else
p->cover[group] = RTreeCombineRect(&BranchBuf[i].mbr, &p->cover[group]);
p->area[group] = RTreeRectSphericalVolume(&p->cover[group]);
p->count[group]++;
} /**
* Pick two rects from set to be the first elements of the two groups.
* Pick the two that waste the most area if covered by a single rectangle.
*/
static void _RTreePickSeeds(RTREEPARTITION *p)
{
int i, j, seed0=0, seed1=0;
REALTYPE worst, waste, area[MAXCARD+1];
for (i=0; i<p->total; i++)
area[i] = RTreeRectSphericalVolume(&BranchBuf[i].mbr);
worst = -CoverSplitArea - 1;
for (i=0; i<p->total-1; i++)
{
for (j=i+1; j<p->total; j++)
{
RTREEMBR one_rect;
one_rect = RTreeCombineRect(&BranchBuf[i].mbr, &BranchBuf[j].mbr);
waste = RTreeRectSphericalVolume(&one_rect) - area[i] - area[j];
if (waste > worst)
{
worst = waste;
seed0 = i;
seed1 = j;
}
}
}
_RTreeClassify(seed0, 0, p);
_RTreeClassify(seed1, 1, p);
} /**
* Copy branches from the buffer into two nodes according to the partition.
*/
static void _RTreeLoadNodes( RTREENODE *n, RTREENODE *q, RTREEPARTITION *p)
{
int i;
assert(n && q && p);
for (i=0; i<p->total; i++)
{
assert(p->partition[i] == 0 || p->partition[i] == 1);
if (p->partition[i] == 0)
RTreeAddBranch(&BranchBuf[i], n, NULL);
else if (p->partition[i] == 1)
RTreeAddBranch(&BranchBuf[i], q, NULL);
}
} /**
* Initialize a RTREEPARTITION structure.
*/
static void _RTreeInitPart( RTREEPARTITION *p, int maxrects, int minfill)
{
int i;
assert(p);
p
->count[0] = p->count[1] = 0;p->cover[0] = p->cover[1] = RTreeNullRect();
p->area[0] = p->area[1] = (REALTYPE)0;
p->total = maxrects;
p->minfill = minfill;
for (i=0; i<maxrects; i++)
{
p->taken[i] = FALSE;
p->partition[i] = -1;
}
} /**
* Print out data for a partition from RTREEPARTITION struct.
*/
static void _RTreePrintPart( RTREEPARTITION *p)
{
int i;
assert(p);
fprintf (stdout, " partition: ");
for (i=0; i<p->total; i++)
{
fprintf (stdout, "%3d ", i);
}
fprintf (stdout, " ");
for (i=0; i<p->total; i++)
{
if (p->taken[i])
fprintf (stdout, " t ");
else
fprintf (stdout, " ");
}
fprintf (stdout, " ");
for (i=0; i<p->total; i++)
{
fprintf (stdout, "%3d ", p->partition[i]);
}
fprintf (stdout, " ");
fprintf (stdout,
"count[0] = %d area = %f ", p->count[0], p->area[0]);fprintf (stdout, "count[1] = %d area = %f ", p->count[1], p->area[1]);
if (p->area[0] + p->area[1] > 0)
{
fprintf (stdout, "total area = %f effectiveness = %3.2f ",
p->area[0] + p->area[1], (float)CoverSplitArea / (p->area[0] + p->area[1]));
}
fprintf (stdout, "cover[0]: ");
RTreePrintRect(&p->cover[0], 0);
fprintf (stdout,
"cover[1]: ");RTreePrintRect(&p->cover[1], 0);
} /**
* Method #0 for choosing a partition:
* As the seeds for the two groups, pick the two rects that would waste the
* most area if covered by a single rectangle, i.e. evidently the worst pair
* to have in the same group.
* Of the remaining, one at a time is chosen to be put in one of the two groups.
* The one chosen is the one with the greatest difference in area expansion
* depending on which group - the mbr most strongly attracted to one group
* and repelled from the other.
* If one group gets too full (more would force other group to violate min
* fill requirement) then other group gets the rest.
* These last are the ones that can go in either group most easily.
*/
static void _RTreeMethodZero( RTREEPARTITION *p, int minfill )
{
int i;
REALTYPE biggestDiff;
int group, chosen=0, betterGroup=0;
assert(p);
_RTreeInitPart(p, BranchCount, minfill);
_RTreePickSeeds(p);
while (p->count[0] + p->count[1] < p->total &&
p->count[0] < p->total - p->minfill &&
p->count[1] < p->total - p->minfill)
{
biggestDiff = (REALTYPE)-1.;
for (i=0; i<p->total; i++)
{
if (!p->taken[i])
{
RTREEMBR *r, rect_0, rect_1;
REALTYPE growth0, growth1, diff;
r = &BranchBuf[i].mbr;
rect_0 = RTreeCombineRect(r, &p->cover[0]);
rect_1 = RTreeCombineRect(r, &p->cover[1]);
growth0 = RTreeRectSphericalVolume(&rect_0) - p->area[0];
growth1 = RTreeRectSphericalVolume(&rect_1) - p->area[1];
diff = growth1 - growth0;
if (diff >= 0)
group = 0;
else
{
group = 1;
diff = -diff;
}
if (diff > biggestDiff)
{
biggestDiff = diff;
chosen = i;
betterGroup = g