【Redis】初探dict字典原理的实现(一)

现在的位置: 首页 > 综合 > 正文

【Redis】初探dict字典原理的实现(一)

2017年10月27日 ⁄ 综合 ⁄ 共 18713字 ⁄ 字号小中大 ⁄ 评论关闭

先简单阅读了源码，学到一些对我来说是比较新鲜的概念(比如双table,fingerprint等)。以下是在源码基础上的一些注释翻译，及个人的理解；稍后会用图示的方式来说明一下dict的各种实现原理。

dict.h

/* Hash表实现.*/
#include <stdint.h>
#ifndef __DICT_H
#define __DICT_H
#define DICT_OK 0
#define DICT_ERR 1
/* Unused arguments generate annoying warnings... */
#define DICT_NOTUSED(V) ((void) V)
/* 字典entry */
typedef struct dictEntry {
    void *key;                        // 键
    union {                           // union, 与内存优化相关
        void *val;                        // 指向除int类型之外的值
        uint64_t u64;                     // 64位unsigned整型
        int64_t s64;                      // 64位signed整型
    } v;
    struct dictEntry *next;          // next dictEntry
} dictEntry;

/* 字典类型 */
typedef struct dictType {
    unsigned int (*hashFunction)(const void *key);                                     // 含key的hash函数
    void *(*keyDup)(void *privdata, const void *key);                                  // key的拷贝函数
    void *(*valDup)(void *privdata, const void *obj);                                  // value的拷贝函数
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);  // 对key的compare函数
    void (*keyDestructor)(void *privdata, void *key);                                  // key析构
    void (*valDestructor)(void *privdata, void *obj);                                  // value析构
} dictType;
/* hashtable结构. 每个字典包含两个ht, 用于逐步实现rehash, 从旧的ht拓展到新的ht. */
typedef struct dictht {
    dictEntry **table;                 // 字典节点
    unsigned long size;                // size
    unsigned long sizemask;            // size-1
    unsigned long used;                // 已有值的table项的数量
} dictht;
/* 字典自身 */
typedef struct dict {
    dictType *type;           // dictType
    void *privdata;           // 存储额外的信息
    dictht ht[2];             // ht[0]为主hash表,ht[1]为副hash表;可实现动态扩容
    int rehashidx;            // 表示数据动态迁移的进行位置， 为-1时说明没有进行rehash
    int iterators;            //当前存在的迭代器dictIterator的数量 
} dict;
/* 如果safe值为1，意味着在迭代过程中可以安全调用
* dictAdd, dictFind及其他一些操作方法,
* 否则只有调用dictFind()方法是安全的 */
typedef struct dictIterator {
    dict *d;
    int table, index, safe;
    dictEntry *entry, *nextEntry;
    long long fingerprint; /* unsafe iterator fingerprint for misuse detection */
} dictIterator;
typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
/* 每个hash table的初始化长度 */
#define DICT_HT_INITIAL_SIZE 4
/* ------------------------------- 宏 ------------------------------------*/

/* 释放字典的value */
#define dictFreeVal(d, entry) \
if ((d)->type->valDestructor) \
(d)->type->valDestructor((d)->privdata, (entry)->v.val)
/* 设置字典的值 */
#define dictSetVal(d, entry, _val_) do { \
if ((d)->type->valDup) \
entry->v.val = (d)->type->valDup((d)->privdata, _val_); \
else \
entry->v.val = (_val_); \
} while(0)
/* 设置字典值，类型为signed int */
#define dictSetSignedIntegerVal(entry, _val_) \
do { entry->v.s64 = _val_; } while(0)
/* 设置字典值, 类型为unsigned int */
#define dictSetUnsignedIntegerVal(entry, _val_) \
do { entry->v.u64 = _val_; } while(0)
/* 释放字典key */
#define dictFreeKey(d, entry) \
if ((d)->type->keyDestructor) \
(d)->type->keyDestructor((d)->privdata, (entry)->key)
/* 设置字典key */
#define dictSetKey(d, entry, _key_) do { \
    if ((d)->type->keyDup) \
        entry->key = (d)->type->keyDup((d)->privdata, _key_); \
    else \
        entry->key = (_key_); \
} while(0)
/* compare字典key */
#define dictCompareKeys(d, key1, key2) \
(((d)->type->keyCompare) ? \
(d)->type->keyCompare((d)->privdata, key1, key2) : \
(key1) == (key2))
#define dictHashKey(d, key) (d)->type->hashFunction(key)        // 获取hash key
#define dictGetKey(he) ((he)->key)                              // 获取字典key
#define dictGetVal(he) ((he)->v.val)                            //  获取字典val
#define dictGetSignedIntegerVal(he) ((he)->v.s64)               // 获取字典signed int值 
#define dictGetUnsignedIntegerVal(he) ((he)->v.u64)             // 获取字典unsigned int 值
#define dictSlots(d) ((d)->ht[0].size+(d)->ht[1].size)          // 获取字典占用的slots空间
#define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)           // 获取实际使用的空间大小
#define dictIsRehashing(ht) ((ht)->rehashidx != -1)             // 检测是否在rehashing
/* API */
dict *dictCreate(dictType *type, void *privDataPtr);            // 创建字典
int dictExpand(dict *d, unsigned long size);                    // 字典扩展
int dictAdd(dict *d, void *key, void *val);                     // 添加键值对        
dictEntry *dictAddRaw(dict *d, void *key);                      // 由dictAdd调用，是添加元素的底层实现
int dictReplace(dict *d, void *key, void *val);                 // 添加或替换键值对
dictEntry *dictReplaceRaw(dict *d, void *key);                  // 由dictReplace调用,是更新操作的底层实现
int dictDelete(dict *d, const void *key);                       // 删除元素
int dictDeleteNoFree(dict *d, const void *key);                 // 删除元素，但是不删除key
void dictRelease(dict *d);                                      // 清空并释放字典
dictEntry * dictFind(dict *d, const void *key);                 // 给定key查找字典节点
void *dictFetchValue(dict *d, const void *key);                 // 查找给定键的值
int dictResize(dict *d);                                        // 重置字典空间(缩小)
dictIterator *dictGetIterator(dict *d);                         // 获取字典迭代器(只进行next(), 不影响rehash)
dictIterator *dictGetSafeIterator(dict *d);                     // 获取字典迭代器(可进行更新操作)
dictEntry *dictNext(dictIterator *iter);                        // 获取迭代器的下一个元素
void dictReleaseIterator(dictIterator *iter);                   // 释放迭代器对象
dictEntry *dictGetRandomKey(dict *d);                           // 随机返回字典key
void dictPrintStats(dict *d);                                     // 打印状态信息
unsigned int dictGenHashFunction(const void *key, int len);       // 通用的hash函数
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len); // 
void dictEmpty(dict *d, void(callback)(void*));                          // 清空及重置字典(不释放空间)
void dictEnableResize(void);                                             // 启用字典的resize
void dictDisableResize(void);                                            // 金庸字典的resize
int dictRehash(dict *d, int n);                                          // 给定歩数下进行rehash
int dictRehashMilliseconds(dict *d, int ms);                             // 给定毫秒数内进行rehash
void dictSetHashFunctionSeed(unsigned int initval);                      // 设置hash函数seed
unsigned int dictGetHashFunctionSeed(void);                              // 获取hash函数seed
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata); // 字典遍历
/* Hash table 类型 */
extern dictType dictTypeHeapStringCopyKey;                              
extern dictType dictTypeHeapStrings;
extern dictType dictTypeHeapStringCopyKeyValue;
#endif /* __DICT_H */

dict.c

/* Hash Tables Implementation. */
#include "fmacros.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <limits.h>
#include <sys/time.h>
#include <ctype.h>
#include "dict.h"
#include "zmalloc.h"
#include "redisassert.h"
/*通过dictEnableResize() / dictDisableResize()方法我们可以启用/禁用ht空间重新分配. 
* 这对于Redis来说很重要, 因为我们用的是写时复制机制而且不想在子进程执行保存操作时移动过多的内存.
*
* 需要注意的是，即使dict_can_resize设置为0, 并不意味着所有的resize操作都被禁止:
* 一个a hash table仍然可以拓展空间，如果bucket与element之间的比例  > dict_force_resize_ratio。
*/
static int dict_can_resize = 1;
static unsigned int dict_force_resize_ratio = 5;
/* -------------------------- 私有原型 ---------------------------- */
static int _dictExpandIfNeeded(dict *ht);
static unsigned long _dictNextPower(unsigned long size);
static int _dictKeyIndex(dict *ht, const void *key);
static int _dictInit(dict *ht, dictType *type, void *privDataPtr);
/* -------------------------- hash方法 -------------------------------- */
//  xiaomo: 对hash方法的原理没有深究, 基本保留原来的注释
/* Thomas Wang's 32 bit Mix Function */
unsigned int dictIntHashFunction(unsigned int key)
{
    key += ~(key << 15);
    key ^= (key >> 10);
    key += (key << 3);
    key ^= (key >> 6);
    key += ~(key << 11);
    key ^= (key >> 16);
    return key;
}
/* 使用整型keys来标识hash方法*/
unsigned int dictIdentityHashFunction(unsigned int key)
{
    return key;
}
static uint32_t dict_hash_function_seed = 5381;
void dictSetHashFunctionSeed(uint32_t seed) {
    dict_hash_function_seed = seed;
}
uint32_t dictGetHashFunctionSeed(void) {
    return dict_hash_function_seed;
}
/* MurmurHash2, by Austin Appleby
* Note - 以下代码对你的机器做了一些假设:
* 1. 从任何空间地址读取4byte的数据都不会造成crash
* 2. sizeof(int) == 4
*
* 同时它有一些限制-
*
* 1. 它不是增量运行的.
* 2. 在大端/小端的机器上不会产生相同的结果.
*/
unsigned int dictGenHashFunction(const void *key, int len) {  
    /* 'm' and 'r' are mixing constants generated offline.
       They're not really 'magic', they just happen to work well. */
    uint32_t seed = dict_hash_function_seed;
    const uint32_t m = 0x5bd1e995;
    const int r = 24;
    /* Initialize the hash to a 'random' value */
    uint32_t h = seed ^ len;
    /* Mix 4 bytes at a time into the hash */
    const unsigned char *data = (const unsigned char *)key;
    while(len >= 4) {
        uint32_t k = *(uint32_t*)data;
        k *= m;
        k ^= k >> r;
        k *= m;
        h *= m;
        h ^= k;
        data += 4;
        len -= 4;
    }
    /* Handle the last few bytes of the input array */
    switch(len) {
        case 3: h ^= data[2] << 16;
        case 2: h ^= data[1] << 8;
        case 1: h ^= data[0]; h *= m;
    };
    /* Do a few final mixes of the hash to ensure the last few
* bytes are well-incorporated. */
    h ^= h >> 13;
    h *= m;
    h ^= h >> 15;
    return (unsigned int)h;
}
/* And a case insensitive hash function (based on djb hash) */
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
    unsigned int hash = (unsigned int)dict_hash_function_seed;
    while (len--)
    hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
    return hash;
}
/* ----------------------------- API implementation ------------------------- */
/* 重置已经调用过ht_init()来初始化的hash table.
* NOTE: 该方法应当只由 ht_destroy()来调用. */
static void _dictReset(dictht *ht)
{
    ht->table = NULL;
    ht->size = 0;
    ht->sizemask = 0;
    ht->used = 0;
}
/* 创建新的hashtable*/
dict *dictCreate(dictType *type,
void *privDataPtr)
{
    dict *d = zmalloc(sizeof(*d));
    _dictInit(d,type,privDataPtr);
    return d;
}
/* 初始化hashtable */
int _dictInit(dict *d, dictType *type, void *privDataPtr)
{
    _dictReset(&d->ht[0]);
    _dictReset(&d->ht[1]);
    d->type = type;
    d->privdata = privDataPtr;
    d->rehashidx = -1;
    d->iterators = 0;
    return DICT_OK;
}
/* 根据USED/BUCKETS(<=1)的比值来重置table空间大小，足够小并且能存储所有的element.*/
int dictResize(dict *d)
{
    int minimal;
    if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
    minimal = d->ht[0].used;
    if (minimal < DICT_HT_INITIAL_SIZE)
        minimal = DICT_HT_INITIAL_SIZE;
    return dictExpand(d, minimal);
}
/* 创建或扩展hash table */
int dictExpand(dict *d, unsigned long size)
{
    dictht n; /* the new hash table */
    unsigned long realsize = _dictNextPower(size);
    /* 若给定大小小于当前hashtable的元素数量时,将返回错误*/
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;
    /* 给ht分配空间并将所有指针置为NULL */
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;
    /* 如果是首次初始化，那么就不是rehash了，ht[0]指向的就是刚才刚才创建的dictht. */
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }
    /* 从表ht[1]设置为刚创建的dictht，rehashidx设置为0,准备进行渐进式rehash */
    d->ht[1] = n;
    d->rehashidx = 0;
    return DICT_OK;
}
/* 执行N步渐进式rehash. 如果还有key需要迁移则返回1,否则返回0.
* Note that a rehashing step consists in moving a bucket (that may have more
* than one key as we use chaining) from the old to the new hash table. */
int dictRehash(dict *d, int n) {
if (!dictIsRehashing(d)) return 0;
    while(n--) {
        dictEntry *de, *nextde;
        /* 检查是否已经对整个表进行了rehash */
        if (d->ht[0].used == 0) {
            zfree(d->ht[0].table);
            d->ht[0] = d->ht[1];
            _dictReset(&d->ht[1]);
            d->rehashidx = -1;
            return 0;
        }
        /* rehashidx标记的是当前rehash进行到ht[0]的哪个索引位置，因此要防止它超过ht[0].size而越界 */
        assert(d->ht[0].size > (unsigned)d->rehashidx);
        while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
        de = d->ht[0].table[d->rehashidx];                                                  // de为ht[0]中第一个不为空的dictentry
        /* 迁移该bucket中所有的key*/
        while(de) {
            unsigned int h;
            nextde = de->next;
            /* 获取新hashtable中的key */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;      // ?
            de->next = d->ht[1].table[h];                  // 在bukect对应链表的头节点前插入de
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }
    return 1;
}


long long timeInMilliseconds(void) {
    struct timeval tv;
    gettimeofday(&tv,NULL);
    return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
}
/* 常规执行rehash任务，持续时间ms~ms+1毫秒 */
int dictRehashMilliseconds(dict *d, int ms) {
    long long start = timeInMilliseconds();
    int rehashes = 0;
    while(dictRehash(d,100)) {
        rehashes += 100;
        if (timeInMilliseconds()-start > ms) break;
    }
    return rehashes;
}
/* 当ht处在rehashing状态时,如果存在iterator操作的话会由于无法混合这两个ht的数据而导致数据丢失或重复.
*
* 在执行查询或更新操作时, 符合rehash条件下会触发rehash操作，而且只执行一步(渐进式). */
static void _dictRehashStep(dict *d) {
    if (d->iterators == 0) dictRehash(d,1);
}
/* 添加一对新键值到hashtable */
int dictAdd(dict *d, void *key, void *val)
{
    dictEntry *entry = dictAddRaw(d,key);
    if (!entry) return DICT_ERR;
    dictSetVal(d, entry, val);
    return DICT_OK;
}
/* add操作的底层实现. 该方法直接增加一个entry对象, 而不是在已有的entry中修改值并返回给调用者.
*
* 该方法直接暴露给user API来调用。For example:
*
* entry = dictAddRaw(dict,mykey);
* if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
*
* Return values:
*
* 如果key已存在则返回NULL.
* 如果key添加成功则返回可供调用者进行操作的dict entry.
*/
dictEntry *dictAddRaw(dict *d, void *key)
{
    int index;
    dictEntry *entry;
    dictht *ht;
    if (dictIsRehashing(d)) _dictRehashStep(d);
    /* 获取新元素的index值, -1表示该元素已存在. */
    if ((index = _dictKeyIndex(d, key)) == -1)
        return NULL;
    /* 分配空间并存储新创建的entry */
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];   // 如果处在rehashing状态, 使用从表ht[1]
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;
    /* Set the hash entry fields. */
    dictSetKey(d, entry, key);
    return entry;
}
/* 添加或替换已存在的kv,.
* 返回1表示kv被添加成功, 返回0表示执行的是更新操作并已完成. */
int dictReplace(dict *d, void *key, void *val)
{
    dictEntry *entry, auxentry;
    /* 首先尝试执行dictAdd操作，如果成功则返回1. */
    if (dictAdd(d, key, val) == DICT_OK)
        return 1;
    /* 如果已存在key，则获取dictentry */
    entry = dictFind(d, key);
    /* 重新对key赋值并释放旧的值. 考虑到value可能是同一个, 所以要遵守以下的执行顺序. 这种情况下要考虑对象的引用计数先increment (set),
    *然后才是decrement (free), 不要弄反. */
    auxentry = *entry;
    dictSetVal(d, entry, val);
    dictFreeVal(d, &auxentry);
    return 0;
}
/* dictReplaceRaw() 可看作dictAddRaw()的一个延伸方法,它总是返回给定key的dictentry, 即使key已存在而不能再执行add(
*  此时会返回已存在的key对应的dictentry.)
*
* 详细请参考dictAddRaw(). */
dictEntry *dictReplaceRaw(dict *d, void *key) {
    dictEntry *entry = dictFind(d,key);
    return entry ? entry : dictAddRaw(d,key);
}
/* 查找并删除给定key对应的元素 */
static int dictGenericDelete(dict *d, const void *key, int nofree)
{
    unsigned int h, idx;
    dictEntry *he, *prevHe;
    int table;
    if (d->ht[0].size == 0) return DICT_ERR;   /* d->ht[0].table为NULL */
    if (dictIsRehashing(d)) _dictRehashStep(d);        // 触发rehash
    h = dictHashKey(d, key);
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        prevHe = NULL;
        while(he) {
            if (dictCompareKeys(d, key, he->key)) {
                /* 从列表中删除该元素 */
                if (prevHe)
                    prevHe->next = he->next;
                else
                    d->ht[table].table[idx] = he->next;
                if (!nofree) {                               // 同时释放key及value空间
                    dictFreeKey(d, he);
                    dictFreeVal(d, he);
                }
                zfree(he);
                d->ht[table].used--;
                return DICT_OK;
            }
            prevHe = he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return DICT_ERR; /* 给定key不存在 */
}
int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0);               // 默认释放老键值对的空间
}
int dictDeleteNoFree(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,1);                   // 不释放键值对空间
}
/*销毁整个字典对象 */
int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
    unsigned long i;
    /* 释放全部元素 */
    for (i = 0; i < ht->size && ht->used > 0; i++) {
        dictEntry *he, *nextHe;
        if (callback && (i & 65535) == 0) callback(d->privdata);
        if ((he = ht->table[i]) == NULL) continue;
        while(he) {
            nextHe = he->next;
            dictFreeKey(d, he);
            dictFreeVal(d, he);
            zfree(he);
            ht->used--;
            he = nextHe;
        }
    }
    /* 释放table及缓存的结构体 */
    zfree(ht->table);
    /* 重新给ht分配空间 */
    _dictReset(ht);
    return DICT_OK; /* never fails */
}
/* 清除并释放hash table */
void dictRelease(dict *d)
{
    _dictClear(d,&d->ht[0],NULL);
    _dictClear(d,&d->ht[1],NULL);
    zfree(d);
}
dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    unsigned int h, idx, table;
    if (d->ht[0].size == 0) return NULL;   /* 没有table数据, 返回NULL */
    if (dictIsRehashing(d)) _dictRehashStep(d);       // 触发并执行一步rehash
    h = dictHashKey(d, key);
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        while(he) {
            if (dictCompareKeys(d, key, he->key))
                return he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) return NULL;     // 如果不是rehashing状态,也就没有必要遍历ht[1]了
    }
    return NULL;
}
void *dictFetchValue(dict *d, const void *key) {         //给定key获取value
    dictEntry *he;
    he = dictFind(d,key);
    return he ? dictGetVal(he) : NULL;
}
/* 一个fingerprint为一个64位数值,用以表示某个时刻dict的状态, 它由dict的一些属性通过位操作计算得到.
* 场景: 一个非安全迭代器初始后, 会产生一个fingerprint值. 在该迭代器被释放时会重新检查这个fingerprint值.
* 如果前后两个fingerprint值不一致,说明在迭代字典时iterator执行了某些非法操作. */
long long dictFingerprint(dict *d) {
    long long integers[6], hash = 0;
    int j;
    integers[0] = (long) d->ht[0].table;
    integers[1] = d->ht[0].size;
    integers[2] = d->ht[0].used;
    integers[3] = (long) d->ht[1].table;
    integers[4] = d->ht[1].size;
    integers[5] = d->ht[1].used;
    /* We hash N integers by summing every successive integer with the integer
    * hashing of the previous sum. Basically:
    *
    * Result = hash(hash(hash(int1)+int2)+int3) ...
    *
    * This way the same set of integers in a different order will (likely) hash
    * to a different number. */
    for (j = 0; j < 6; j++) {
        hash += integers[j];
        /* For the hashing step we use Tomas Wang's 64 bit integer hash. */
        hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;
        hash = hash ^ (hash >> 24);
        hash = (hash + (hash << 3)) + (hash << 8); // hash * 265
        hash = hash ^ (hash >> 14);
        hash = (hash + (hash << 2)) + (hash << 4); // hash * 21
        hash = hash ^ (hash >> 28);
        hash = hash + (hash << 31);
    }
    return hash;
}
dictIterator *dictGetIterator(dict *d)           // 初始化一个迭代器
{
    dictIterator *iter = zmalloc(sizeof(*iter));
    iter->d = d;
    iter->table = 0;
    iter->index = -1;
    iter->safe = 0;
    iter->entry = NULL;
    iter->nextEntry = NULL;
    return iter;
}
dictIterator *dictGetSafeIterator(dict *d) {    // 初始化一个安全迭代器
    dictIterator *i = dictGetIterator(d);
    i->safe = 1;
    return i;
}
dictEntry *dictNext(dictIterator *iter)
{
    while (1) {
        if (iter->entry == NULL) {                      //首次迭代，初始化entry
            dictht *ht = &iter->d->ht[iter->table];
            if (iter->index == -1 && iter->table == 0) {
                if (iter->safe)
                    iter->d->iterators++;
                else
                    iter->fingerprint = dictFingerprint(iter->d);
            }
            iter->index++;
            if (iter->index >= (signed) ht->size) {
                if (dictIsRehashing(iter->d) && iter->table == 0) {
                    iter->table++;
                    iter->index = 0;
                    ht = &iter->d->ht[1];
                } else {
                    break;
                }
            }
            iter->entry = ht->table[iter->index];
        } else {
            iter->entry = iter->nextEntry;
        }
        if (iter->entry) {
        /* We need to save the 'next' here, the iterator user
         * may delete the entry we are returning. */
            iter->nextEntry = iter->entry->next;
            return iter->entry;
        }
    }
    return NULL;
}
void dictReleaseIterator(dictIterator *iter)
{
    if (!(iter->index == -1 && iter->table == 0)) {
        if (iter->safe)
            iter->d->iterators--;                                 // 特指safe迭代器
        else
            assert(iter->fingerprint == dictFingerprint(iter->d));
    }
    zfree(iter);                 // 释放迭代器空间
}
/* 从hashtable中随机返回元素. 此处需要使用随机函数. */
dictEntry *dictGetRandomKey(dict *d)
{
    dictEntry *he, *orighe;
    unsigned int h;
    int listlen, listele;
    if (dictSize(d) == 0) return NULL;
    if (dictIsRehashing(d)) _dictRehashStep(d);
    if (dictIsRehashing(d)) {
        do {
            h = random() % (d->ht[0].size+d->ht[1].size);
            he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
            d->ht[0].table[h];
        } while(he == NULL);
    } else {
        do {
            h = random() & d->ht[0].sizemask;
            he = d->ht[0].table[h];
        } while(he == NULL);
    }
    /* 现在找到一个指向链表的bucket, 然后需要从链表中随机选取元素.
    * 最明智的做法是统计元素个数并随机选择index值. */
    listlen = 0;
    orighe = he;
    while(he) {
        he = he->next;
        listlen++;
    }
    listele = random() % listlen;
    he = orighe;
    while(listele--) he = he->next;
    return he;
}
/* 用于位翻转. 算法参考自:
* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
static unsigned long rev(unsigned long v) {
    unsigned long s = 8 * sizeof(v); // bit size; must be power of 2
    unsigned long mask = ~0;
    while ((s >>= 1) > 0) {
        mask ^= (mask << s);
        v = ((v >> s) & mask) | ((v << s) & ~mask);
    }
    return v;
}
/* dictScan() 用于遍历字典元素.
*
* 具体如何遍历及解决双table的问题，另外会再做讨论说明
*/
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata)
{
    dictht *t0, *t1;
    const dictEntry *de;
    unsigned long m0, m1;
    if (dictSize(d) == 0) return 0;
    if (!dictIsRehashing(d)) {
        t0 = &(d->ht[0]);
        m0 = t0->sizemask;
        /* Emit entries at cursor */
        de = t0->table[v & m0];
        while (de) {
            fn(privdata, de);
            de = de->next;
        }
    } else {
        t0 = &d->ht[0];
        t1 = &d->ht[1];
        /* 保证t0为小表, t1为大表 */
        if (t0->size > t1->size) {
            t0 = &d->ht[1];
            t1 = &d->ht[0];
        }
        m0 = t0->sizemask;
        m1 = t1->sizemask;
        /* Emit entries at cursor */
        de = t0->table[v & m0];
        while (de) {
            fn(privdata, de);
            de = de->next;
        }
/* Iterate over indices in larger table that are the expansion
* of the index pointed to by the cursor in the smaller table */
    do {
        /* Emit entries at cursor */
        de = t1->table[v & m1];
        while (de) {
            fn(privdata, de);
            de = de->next;
        }
        /* Increment bits not covered by the smaller mask */
        v = (((v | m0) + 1) & ~m0) | (v & m0);
        /* Continue while bits covered by mask difference is non-zero */
        } while (v & (m0 ^ m1));
    }
/* Set unmasked bits so incrementing the reversed cursor
* operates on the masked bits of the smaller table */
    v |= ~m0;
    /* Increment the reverse cursor */
    v = rev(v);
    v++;
    v = rev(v);
    return v;
}
/* ------------------------- 私有方法 ------------------------------ */
/* 判断是否需要拓展hashtable空间 */
static int _dictExpandIfNeeded(dict *d)
{
    /* 如果正在进行rehashing，则直接返回. */
    if (dictIsRehashing(d)) return DICT_OK;
    /* 如果hashtable为空则将其拓展至初始大小. */
    if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
/* 当 (已存储的节点数>=buckect数) && (允许resize || 节点数/bukect数>阈值) 时，进行字典扩展. */
    if (d->ht[0].used >= d->ht[0].size && (dict_can_resize || d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
    {
        return dictExpand(d, d->ht[0].used*2);
    }
    return DICT_OK;
}
/* 在到达空间上限前, 字典空间是翻倍扩展的*/
static unsigned long _dictNextPower(unsigned long size)
{
    unsigned long i = DICT_HT_INITIAL_SIZE;
    if (size >= LONG_MAX) return LONG_MAX;
    while(1) {
        if (i >= size)
            return i;
        i *= 2;
    }
}
/* 返回给定新key的对应空闲的索引地址.
* 如果key值已存在，则返回-1.
*
* 如果dict正处于rehasing状态, 函数总会返回ht[1]中的索引地址. */
static int _dictKeyIndex(dict *d, const void *key)
{
    unsigned int h, idx, table;
    dictEntry *he;
    /* 拓展hashtable空间 */
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;
    /* 计算key对应的hash value */
    h = dictHashKey(d, key);
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        /* 遍历查找该索引地址指向的list是否包含给定key，存在则返回-1 */
        he = d->ht[table].table[idx];
        while(he) {
            if (dictCompareKeys(d, key, he->key))
                return -1;
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return idx;
}
/* 清空字典数据但不释放空间 */
void dictEmpty(dict *d, void(callback)(void*)) {    
    _dictClear(d,&d->ht[0],callback);
    _dictClear(d,&d->ht[1],callback);
    d->rehashidx = -1;
    d->iterators = 0;
}

/* 启用空间缩小功能 */
void dictEnableResize(void) {
    dict_can_resize = 1;
}

/* 禁用空间缩小功能 */
void dictDisableResize(void) {
    dict_can_resize = 0;
}

下一节将会重点图解字典的原理(重点在rehash)。

【上篇】【python】理解迭代器及yield
【下篇】【python】zip()方法的一种应用场景

作者: moniker

该日志由 moniker 于7年前发表在综合分类下，最后更新于 2017年10月27日.
转载请注明: 【Redis】初探dict字典原理的实现(一) | 学步园 +复制链接

抱歉!评论已关闭.

学步园

【Redis】初探dict字典原理的实现(一)

作者: moniker

书签

最新文章New

本站推荐

返回首页