一步一步理会Dictionary完成道理

一步一步理会Dictionary完成道理

  本文是对c#中Dictionary内部完成道理举行简朴的理会。若有表述毛病,迎接斧正。

  重要对比源码来剖析,现在对比源码的版本是.Net Framwork 4.8,源码地点

1. 症结的字段和Entry构造

        struct Entry
        {
            public int hashCode;    // key的hashCode & 0x7FFFFFFF
            public int next;            // 指向链表下一个元素的地点(现实就是entries的索引),末了一个元素为-1
            public TKey key;
            public TValue value;
        }
        Entry[] entries;        //寄存键值
        int[] buckets;          //存储entries最新元素的索引,其存储位置由取模效果决议。例:假定键值存储在entries的第1元素的位置上,且hashCode和长度的取模效果为2,那末buckets[2] = 1
        int count = 0;         //已存储键值的个数
        int version;             //纪录版本,防备迭代过程当中鸠合被变动
        IEqualityComparer<TKey> _comparer;    
        int freeList;             //entries中最新空元素的索引
        int freeCount;         //entries中空元素的个数

2. 增加键值(Add)

        public void Add(TKey key, TValue value) {
            Insert(key, value, true);
        }


        private void Insert(TKey key, TValue value, bool add) {
        
            if( key == null ) {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
            }
            if (buckets == null) Initialize(0);
            int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
            //取模
            int targetBucket = hashCode % buckets.Length;
#if FEATURE_RANDOMIZED_STRING_HASHING
            int collisionCount = 0;
#endif
            for (int i = buckets[targetBucket]; i >= 0; i = entries[i].next) {
                if (entries[i].hashCode == hashCode &&  comparer.Equals(entries[i].key, key)) {
                    if (add) {
                         ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_AddingDuplicate);
                    }
                    //关于已存在的Key从新赋值
                    entries[i].value = value;
                    version++;
                    return;
                }
#if FEATURE_RANDOMIZED_STRING_HASHING
                collisionCount++;
#endif
            }
            int index;
            if (freeCount > 0) {
                //存在entries中存在空元素
                index = freeList;
                freeList = entries[index].next;
                freeCount--;
            }
            else {
                if (count == entries.Length)
                {
                    //扩容:取大于count * 2的最小素数作为entries和bucket的新容量(即数组长度.Length)
                    Resize();
                    targetBucket = hashCode % buckets.Length;
                }
                index = count;
                count++;
            }
            entries[index].hashCode = hashCode;
            entries[index].next = buckets[targetBucket];
            entries[index].key = key;
            entries[index].value = value;
            //存取链表的头元素的索引(即entries末了存入的元素的在enties中的索引)
            //便于取Key的时每次从链表的头元素最先遍历,细致见FindEntry(TKey key)函数
            buckets[targetBucket] = index;
            version++;
#if FEATURE_RANDOMIZED_STRING_HASHING
#if FEATURE_CORECLR
            // In case we hit the collision threshold we'll need to switch to the  comparer which is using randomized string hashing
            // in this case will be EqualityComparer<string>.Default.
            // Note, randomized string hashing is turned on by default on coreclr so  EqualityComparer<string>.Default will
            // be using randomized string hashing
            if (collisionCount > HashHelpers.HashCollisionThreshold && comparer ==  NonRandomizedStringEqualityComparer.Default)
            {
                comparer = (IEqualityComparer<TKey>)  EqualityComparer<string>.Default;
                Resize(entries.Length, true);
            }
#else
            if(collisionCount > HashHelpers.HashCollisionThreshold &&  HashHelpers.IsWellKnownEqualityComparer(comparer))
            {
                //假如碰撞次数(单链表长度)大于设置的最大碰撞阈值,须要扩容
                comparer = (IEqualityComparer<TKey>)  HashHelpers.GetRandomizedEqualityComparer(comparer);
                Resize(entries.Length, true);
            }
#endif // FEATURE_CORECLR
#endif
        }

******************************************************************************************************************************************
        static void Foo()
        {
            var dicData = new Dictionary<int, int>();
      //增加键值
            new List<int> { 1, 2, 4 }.ForEach(item => Add(item, dicData));
            new List<int> { 22, 29, 36, 20 }.ForEach(item => Add(item, dicData));
        }
        static void Add(int key, Dictionary<int, int> dicData)
        {
            dicData.Add(key, key);
        }

 

2.1 数组entries和buckets初始化

 

 

 

 2.2 增加键值{1,1},则

    hashCode = 1; targetBucket = hasCode % buckets.Length;         //targetBucket = 1
    next = buckets[targetBucket];                               //next = -1
    buckets[targetBucket] = index;                             //buckets[1] = 0 

 

 

 2.3 增加键值{2,2},则

    hashCode = 2; targetBucket = hasCode % buckets.Length;         //targetBucket = 2
    next = buckets[targetBucket];                               //next = -1
    buckets[targetBucket] = index;                              //buckets[2] = 1

 

 

 2.4 增加键值{4,4},则

    hashCode = 4; targetBucket = hasCode % buckets.Length;         //targetBucket = 1
    next = buckets[targetBucket];                               //next = 0
    buckets[targetBucket] = index;                              //buckets[1] = 2

 

接下来将entries数组以单链表的情势显现(即enteries数组横向);

 2.5 在继承增加键值之前,须要扩容操纵,由于entries数组长度为3且都已有元素。扩容后须要对buckets和entries每一个元素的Next须要从新赋值;

            private void Resize(int newSize, bool forceNewHashCodes) {
            Contract.Assert(newSize >= entries.Length);
            //实例化buckets,并将每一个元素置为-1
            int[] newBuckets = new int[newSize];
            for (int i = 0; i < newBuckets.Length; i++) newBuckets[i] = -1;
            Entry[] newEntries = new Entry[newSize];
            Array.Copy(entries, 0, newEntries, 0, count);
            //假如是Hash碰撞扩容,运用新HashCode函数从新盘算Hash值
            if(forceNewHashCodes) {
                for (int i = 0; i < count; i++) {
                    if(newEntries[i].hashCode != -1) {
                        newEntries[i].hashCode =  (comparer.GetHashCode(newEntries[i].key) & 0x7FFFFFFF);
                    }
                }
            }
            //重修单链表
            for (int i = 0; i < count; i++) {
                if (newEntries[i].hashCode >= 0) {
                    //取模从新设置next值和buckets
                    int bucket = newEntries[i].hashCode % newSize;
                    newEntries[i].next = newBuckets[bucket];
                    newBuckets[bucket] = i;
                }
            }
            buckets = newBuckets;
            entries = newEntries;
        }

 2.6 继承增加键值{22,22},{29,29},{36,36},{40,40},增加完后其内部存储效果以下

 3. 取Key值(dic[22])

     public TValue this[TKey key] {
            get {
                //取Key对应值在entries的索引
                int i = FindEntry(key);
                if (i >= 0) return entries[i].value;
                ThrowHelper.ThrowKeyNotFoundException();
                return default(TValue);
            }
            set {
                //更新Key对应的值
                Insert(key, value, false);
            }
        }

    private int FindEntry(TKey key) {
            if( key == null) {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
            }
            if (buckets != null) {
                int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
                //遍历单链表
                for (int i = buckets[hashCode % buckets.Length]; i >= 0; i =  entries[i].next) {
                    if (entries[i].hashCode == hashCode &&  comparer.Equals(entries[i].key, key)) return i;
                }
            }
            return -1;
        }
*********************************************************************************************
        static void Foo()
        {
            ......
            //取Key=22
            var val =dicData[22];

}

简化取Key对应值的代码

    var hashCode =comparer.GetHashCode(key) & 0x7FFFFFFF;   // 22
    var targetBuget = hashCode % buckets.Length;            //取模运算 1  
    var i = bucket[targetBuget];                            //链表头元素的索引 bucket[1] = 5
    //遍历单链表
    for (; i >= 0; i =  entries[i].next) {
        if (entries[i].hashCode == hashCode &&  comparer.Equals(entries[i].key, key)) return i;
    }

 4. 移除键值(Remove)

        public bool Remove(TKey key) {
            if(key == null) {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
            }
            if (buckets != null) {
                int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
                int bucket = hashCode % buckets.Length;
                int last = -1;
                //其道理先掏出键值,然后纪录entries余暇的索引(freeList)和余暇个数(freeCount)
                for (int i = buckets[bucket]; i >= 0; last = i, i = entries[i].next)  {
                    if (entries[i].hashCode == hashCode &&  comparer.Equals(entries[i].key, key)) {
                        if (last < 0) {
                            buckets[bucket] = entries[i].next;
                        }
                        else {
                            entries[last].next = entries[i].next;
                        }
                        entries[i].hashCode = -1;
                        //竖立余暇链表
                        entries[i].next = freeList;
                        entries[i].key = default(TKey);
                        entries[i].value = default(TValue);
                        //保留entryies中空元素的索引
                        //便于插进去新键值时,放在当前索引的位置,削减entryies空间上的糟蹋
                        freeList = i;
                        //空元素的个数加1
                        freeCount++;
                        version++;
                        return true;
                    }
                }
            }
            return false;
        }
*******************************************************************
        static void Foo()
        {
            ......
            //移除
            new List<int> { 22, 29 }.ForEach(item => dicData.Remove(item));
        } 

4.1 移除Key=22后,freeList = 3, freeCount = 1,

 4.2 移除Key=36后,freeList = 5, freeCount = 2, 

 

 

 5. 再插进去键值

如上图,当移撤除{36,36}后,会发现又降生一个含有两个元素的“新链表”(上图灰色框)。这个作用就是为了插进去新键值时,根据“新链表”纪录的索引递次插进去到entries数组中。 例:增加键值{22,22},{25,25},此时freeList = 5,freeCount = 2;

  1. 给entries[5]赋值,freeList = 3, freeCount = 1;
  2. 给entries[3]赋值,freeList = -1, freeCount = 0;

 

 愿望此文可以让你关于Dictionary内部完成有所熟悉。

Up Next:

面试官:你有m个鸡蛋,如何用起码的次数测出鸡蛋会在哪一层碎?

面试官:你有m个鸡蛋,如何用起码的次数测出鸡蛋会在哪一层碎?