现在的位置: 首页 > 综合 > 正文

【转载】C#获取文件编码

2012年12月02日 ⁄ 综合 ⁄ 共 38048字 ⁄ 字号 评论关闭

转载一段C#获取文件编码的代码,在此感谢代码提供者的无私奉献!

/// <summary>
    /// 检测字符编码的类
    /// <seealso cref="System.IO.Stream"/>
    /// <seealso cref="System.Uri"/>
    /// <seealso cref="System.IO.FileInfo"/>
    /// </summary>
    /// <remarks>
    /// <![CDATA[
    /// <strong>FileEncoder</strong> 用来检测 <see cref="Uri"/>,<see cref="System.IO.FileInfo"/>,<see cref="sbyte"/> 字节数组的编码.
    /// Create By lion  <br/>
    /// 2005-02-21 22:00  <br/>
    /// Support .Net Framework v1.1.4322 <br/>
    /// WebSite:www.lionsky.net(lion-a AT sohu.com) <br/>
    /// ]]>
    /// </remarks>
    public class FileEncoder
    {
        #region Fields.....

        // Frequency tables to hold the GB, Big5, and EUC-TW character
        // frequencies
        internal static int[][] GBFreq = new int[94][];
        internal static int[][] GBKFreq = new int[126][];
        internal static int[][] Big5Freq = new int[94][];
        internal static int[][] EUC_TWFreq = new int[94][];

        internal static string[] nicename = new string[]
   {
    "GB2312", "GBK", "HZ", "Big5", "CNS 11643"
    , "ISO 2022CN", "UTF-8", "Unicode", "ASCII", "OTHER"
   };

        #endregion

        #region Methods.....

        /// <summary>
        /// 初始化 <see cref="IdentifyEncoding"/> 的实例
        /// </summary>
        public FileEncoder()
        {
            Initialize_Frequencies();
        }

        #region GetEncodingName.....

        /// <summary>
        /// 从指定的 <see cref="Uri"/> 中判断编码类型
        /// </summary>
        /// <param name="testurl">要判断的 <see cref="Uri"/> </param>
        /// <returns>返回编码类型("GB2312", "GBK", "HZ", "Big5", "CNS 11643", "ISO 2022CN", "UTF-8", "Unicode", "ASCII", "OTHER")</returns>
        /// <example>
        /// 以下示例演示了如何调用 <see cref="GetEncodingName"/> 方法:
        /// <code>
        ///  IdentifyEncoding ide = new IdentifyEncoding();
        ///  Response.Write(ide.GetEncodingName(new Uri("http://china5.nikkeibp.co.jp/china/news/com/200307/pr_com200307170131.html"))); 
        /// </code>
        /// </example>
        public virtual string GetEncodingName(System.Uri testurl)
        {
            sbyte[] rawtext = new sbyte[1024];
            int bytesread = 0, byteoffset = 0;
            System.IO.Stream chinesestream;
            try
            {
                chinesestream = System.Net.WebRequest.Create(testurl.AbsoluteUri).GetResponse().GetResponseStream();
                while ((bytesread = ReadInput(chinesestream, ref rawtext, byteoffset, rawtext.Length - byteoffset)) > 0)
                {
                    byteoffset += bytesread;
                }
                chinesestream.Close();
            }
            catch (System.Exception e)
            {
                System.Console.Error.WriteLine("Error loading or using URL " + e.ToString());
            }
            return GetEncodingName(rawtext);
        }

        /// <summary>
        /// 从指定的 <see cref="System.IO.FileInfo"/> 中判断编码类型
        /// </summary>
        /// <param name="testfile">要判断的 <see cref="System.IO.FileInfo"/> </param>
        /// <returns>返回编码类型("GB2312", "GBK", "HZ", "Big5", "CNS 11643", "ISO 2022CN", "UTF-8", "Unicode", "ASCII", "OTHER")</returns>
        /// <example>
        /// 以下示例演示了如何调用 <see cref="GetEncodingName"/> 方法:
        /// <code>
        ///  IdentifyEncoding ide = new IdentifyEncoding();
        ///  Response.Write(ide.GetEncodingName(new System.IO.FileInfo(@"C:\test.txt"))); 
        /// </code>
        /// </example>
        public virtual string GetEncodingName(System.IO.FileInfo testfile)
        {
            System.IO.FileStream chinesefile;
            sbyte[] rawtext;
            rawtext = new sbyte[(int)FileLength(testfile)];
            try
            {
                chinesefile = new System.IO.FileStream(testfile.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
                ReadInput(chinesefile, ref rawtext, 0, rawtext.Length);
            }
            catch (System.Exception e)
            {
                System.Console.Error.WriteLine("Error: " + e);
            }

            return GetEncodingName(rawtext);
        }

        /// <summary>
        /// 从指定的 <see cref="sbyte"/> 字节数组中判断编码类型
        /// </summary>
        /// <param name="rawtext">要判断的 <see cref="System.IO.FileInfo"/> </param>
        /// <returns>返回编码类型("GB2312", "GBK", "HZ", "Big5", "CNS 11643", "ISO 2022CN", "UTF-8", "Unicode", "ASCII", "OTHER")</returns>
        /// <example>
        /// 以下示例演示了如何调用 <see cref="GetEncodingName"/> 方法:
        /// <code>
        ///  IdentifyEncoding ide = new IdentifyEncoding();
        ///  Response.Write(ide.GetEncodingName(IdentifyEncoding.ToSByteArray(System.Text.Encoding.GetEncoding("gb2312").GetBytes("Lion互动网络(www.lionsky.net)"))));
        /// </code>
        /// </example>
        public virtual string GetEncodingName(sbyte[] rawtext)
        {
            int[] scores;
            int index, maxscore = 0;
            int encoding_guess = 0;

            scores = new int[10];
            //分析编码的概率
            scores[0] = GB2312Probability(rawtext);
            scores[1] = GBKProbability(rawtext);
            scores[2] = HZProbability(rawtext);
            scores[3] = BIG5Probability(rawtext);
            scores[4] = ENCTWProbability(rawtext);
            scores[5] = ISO2022CNProbability(rawtext);
            scores[6] = UTF8Probability(rawtext);
            scores[7] = UnicodeProbability(rawtext);
            scores[8] = ASCIIProbability(rawtext);
            scores[9] = 0;

            // Tabulate Scores
            for (index = 0; index < 10; index++)
            {
                if (scores[index] > maxscore)
                {
                    encoding_guess = index;
                    maxscore = scores[index];
                }
            }

            // Return OTHER if nothing scored above 50
            if (maxscore <= 50)
            {
                encoding_guess = 9;
            }

            return nicename[encoding_guess];
        }

        #endregion

        #region About Probability.....

        #region GB2312Probability

        /// <summary>
        /// 判断是GB2312编码的可能性
        /// </summary>
        /// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
        /// <returns>返回 0 至 100 之间的可能性</returns>
        internal virtual int GB2312Probability(sbyte[] rawtext)
        {
            int i, rawtextlen = 0;

            int dbchars = 1, gbchars = 1;
            long gbfreq = 0, totalfreq = 1;
            float rangeval = 0, freqval = 0;
            int row, column;

            // Stage 1:  Check to see if characters fit into acceptable ranges

            rawtextlen = rawtext.Length;
            for (i = 0; i < rawtextlen - 1; i++)
            {
                if (rawtext[i] >= 0)
                {
                    //asciichars++;
                }
                else
                {
                    dbchars++;
                    if ((sbyte)Identity(0xA1) <= rawtext[i] && rawtext[i] <= (sbyte)Identity(0xF7) && (sbyte)Identity(0xA1) <= rawtext[i + 1] && rawtext[i + 1] <= (sbyte)Identity(0xFE))
                    {
                        gbchars++;
                        totalfreq += 500;
                        row = rawtext[i] + 256 - 0xA1;
                        column = rawtext[i + 1] + 256 - 0xA1;
                        if (GBFreq[row][column] != 0)
                        {
                            gbfreq += GBFreq[row][column];
                        }
                        else if (15 <= row && row < 55)
                        {
                            gbfreq += 200;
                        }
                    }
                    i++;
                }
            }

            rangeval = 50 * ((float)gbchars / (float)dbchars);
            freqval = 50 * ((float)gbfreq / (float)totalfreq);

            return (int)(rangeval + freqval);
        }

        #endregion

        #region GBKProbability.....

        /// <summary>
        /// 判断是GBK编码的可能性
        /// </summary>
        /// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
        /// <returns>返回 0 至 100 之间的可能性</returns>
        internal virtual int GBKProbability(sbyte[] rawtext)
        {
            int i, rawtextlen = 0;

            int dbchars = 1, gbchars = 1;
            long gbfreq = 0, totalfreq = 1;
            float rangeval = 0, freqval = 0;
            int row, column;

            // Stage 1:  Check to see if characters fit into acceptable ranges
            rawtextlen = rawtext.Length;
            for (i = 0; i < rawtextlen - 1; i++)
            {
                if (rawtext[i] >= 0)
                {
                    //asciichars++;
                }
                else
                {
                    dbchars++;
                    if ((sbyte)Identity(0xA1) <= rawtext[i] && rawtext[i] <= (sbyte)Identity(0xF7) && (sbyte)Identity(0xA1) <= rawtext[i + 1] && rawtext[i + 1] <= (sbyte)Identity(0xFE))
                    {
                        gbchars++;
                        totalfreq += 500;
                        row = rawtext[i] + 256 - 0xA1;
                        column = rawtext[i + 1] + 256 - 0xA1;

                        if (GBFreq[row][column] != 0)
                        {
                            gbfreq += GBFreq[row][column];
                        }
                        else if (15 <= row && row < 55)
                        {
                            gbfreq += 200;
                        }
                    }
                    else if ((sbyte)Identity(0x81) <= rawtext[i] && rawtext[i] <= (sbyte)Identity(0xFE) && (((sbyte)Identity(0x80) <= rawtext[i + 1] && rawtext[i + 1] <= (sbyte)Identity(0xFE)) || ((sbyte)0x40 <= rawtext[i + 1] && rawtext[i + 1] <= (sbyte)0x7E)))
                    {
                        gbchars++;
                        totalfreq += 500;
                        row = rawtext[i] + 256 - 0x81;
                        if (0x40 <= rawtext[i + 1] && rawtext[i + 1] <= 0x7E)
                        {
                            column = rawtext[i + 1] - 0x40;
                        }
                        else
                        {
                            column = rawtext[i + 1] + 256 - 0x80;
                        }

                        if (GBKFreq[row][column] != 0)
                        {
                            gbfreq += GBKFreq[row][column];
                        }
                    }
                    i++;
                }
            }

            rangeval = 50 * ((float)gbchars / (float)dbchars);
            freqval = 50 * ((float)gbfreq / (float)totalfreq);

            return (int)(rangeval + freqval) - 1;
        }

        #endregion

        #region HZProbability.....

        /// <summary>
        /// 判断是HZ编码的可能性
        /// </summary>
        /// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
        /// <returns>返回 0 至 100 之间的可能性</returns>
        internal virtual int HZProbability(sbyte[] rawtext)
        {
            int i, rawtextlen;
            int hzchars = 0, dbchars = 1;
            long hzfreq = 0, totalfreq = 1;
            float rangeval = 0, freqval = 0;
            int hzstart = 0, hzend = 0;
            int row, column;

            rawtextlen = rawtext.Length;

            for (i = 0; i < rawtextlen; i++)
            {
                if (rawtext[i] == '~')
                {
                    if (rawtext[i + 1] == '{')
                    {
                        hzstart++;
                        i += 2;
                        while (i < rawtextlen - 1)
                        {
                            if (rawtext[i] == 0x0A || rawtext[i] == 0x0D)
                            {
                                break;
                            }
                            else if (rawtext[i] == '~' && rawtext[i + 1] == '}')
                            {
                                hzend++;
                                i++;
                                break;
                            }
                            else if ((0x21 <= rawtext[i] && rawtext[i] <= 0x77) && (0x21 <= rawtext[i + 1] && rawtext[i + 1] <= 0x77))
                            {
                                hzchars += 2;
                                row = rawtext[i] - 0x21;
                                column = rawtext[i + 1] - 0x21;
                                totalfreq += 500;
                                if (GBFreq[row][column] != 0)
                                {
                                    hzfreq += GBFreq[row][column];
                                }
                                else if (15 <= row && row < 55)
                                {
                                    hzfreq += 200;
                                }
                            }
                            else if (((byte)0xA1 <= rawtext[i] && rawtext[i] <= (byte)0xF7) && ((byte)0xA1 <= rawtext[i + 1] && rawtext[i + 1] <= (byte)0xF7))
                            {
                                hzchars += 2;
                                row = rawtext[i] + 256 - 0xA1;
                                column = rawtext[i + 1] + 256 - 0xA1;
                                totalfreq += 500;
                                if (GBFreq[row][column] != 0)
                                {
                                    hzfreq += GBFreq[row][column];
                                }
                                else if (15 <= row && row < 55)
                                {
                                    hzfreq += 200;
                                }
                            }
                            dbchars += 2;
                            i += 2;
                        }
                    }
                    else if (rawtext[i + 1] == '}')
                    {
                        hzend++;
                        i++;
                    }
                    else if (rawtext[i + 1] == '~')
                    {
                        i++;
                    }
                }
            }

            if (hzstart > 4)
            {
                rangeval = 50;
            }
            else if (hzstart > 1)
            {
                rangeval = 41;
            }
            else if (hzstart > 0)
            {
                // Only 39 in case the sequence happened to occur
                rangeval = 39; // in otherwise non-Hz text
            }
            else
            {
                rangeval = 0;
            }
            freqval = 50 * ((float)hzfreq / (float)totalfreq);

            return (int)(rangeval + freqval);
        }

        #endregion

        #region BIG5Probability.....

        /// <summary>
        /// 判断是BIG5编码的可能性
        /// </summary>
        /// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
        /// <returns>返回 0 至 100 之间的可能性</returns>
        internal virtual int BIG5Probability(sbyte[] rawtext)
        {
            int i, rawtextlen = 0;
            int dbchars = 1, bfchars = 1;
            float rangeval = 0, freqval = 0;
            long bffreq = 0, totalfreq = 1;
            int row, column;

            // Check to see if characters fit into acceptable ranges

            rawtextlen = rawtext.Length;
            for (i = 0; i < rawtextlen - 1; i++)
            {
                if (rawtext[i] >= 0)
                {
                    //asciichars++;
                }
                else
                {
                    dbchars++;
                    if ((sbyte)Identity(0xA1) <= rawtext[i] && rawtext[i] <= (sbyte)Identity(0xF9) && (((sbyte)0x40 <= rawtext[i + 1] && rawtext[i + 1] <= (sbyte)0x7E) || ((sbyte)Identity(0xA1) <= rawtext[i + 1] && rawtext[i + 1] <= (sbyte)Identity(0xFE))))
                    {
                        bfchars++;
                        totalfreq += 500;
                        row = rawtext[i] + 256 - 0xA1;
                        if (0x40 <= rawtext[i + 1] && rawtext[i + 1] <= 0x7E)
                        {
                            column = rawtext[i + 1] - 0x40;
                        }
                        else
                        {
                            column = rawtext[i + 1] + 256 - 0x61;
                        }
                        if (Big5Freq[row][column] != 0)
                        {
                            bffreq += Big5Freq[row][column];
                        }
                        else if (3 <= row && row <= 37)
                        {
                            bffreq += 200;
                        }
                    }
                    i++;
                }
            }

            rangeval = 50 * ((float)bfchars / (float)dbchars);
            freqval = 50 * ((float)bffreq / (float)totalfreq);

            return (int)(rangeval + freqval);
        }

        #endregion

        #region ENCTWProbability.....

        /// <summary>
        /// 判断是CNS11643(台湾)编码的可能性
        /// </summary>
        /// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
        /// <returns>返回 0 至 100 之间的可能性</returns>
        internal virtual int ENCTWProbability(sbyte[] rawtext)
        {
            int i, rawtextlen = 0;
            int dbchars = 1, cnschars = 1;
            long cnsfreq = 0, totalfreq = 1;
            float rangeval = 0, freqval = 0;
            int row, column;

            // Check to see if characters fit into acceptable ranges
            // and have expected frequency of use

            rawtextlen = rawtext.Length;
            for (i = 0; i < rawtextlen - 1; i++)
            {
                if (rawtext[i] >= 0)
                {
                    // in ASCII range
                    //asciichars++;
                }
                else
                {
                    // high bit set
                    dbchars++;
                    if (i + 3 < rawtextlen && (sbyte)Identity(0x8E) == rawtext[i] && (sbyte)Identity(0xA1) <= rawtext[i + 1] && rawtext[i + 1] <= (sbyte)Identity(0xB0) && (sbyte)Identity(0xA1) <= rawtext[i + 2] && rawtext[i + 2] <= (sbyte)Identity(0xFE) && (sbyte)Identity(0xA1) <= rawtext[i + 3] && rawtext[i + 3] <= (sbyte)Identity(0xFE))
                    {
                        // Planes 1 - 16

                        cnschars++;
                        // These are all less frequent chars so just ignore freq
                        i += 3;
                    }
                    else if ((sbyte)Identity(0xA1) <= rawtext[i] && rawtext[i] <= (sbyte)Identity(0xFE) && (sbyte)Identity(0xA1) <= rawtext[i + 1] && rawtext[i + 1] <= (sbyte)Identity(0xFE))
                    {
                        cnschars++;
                        totalfreq += 500;
                        row = rawtext[i] + 256 - 0xA1;
                        column = rawtext[i + 1] + 256 - 0xA1;
                        if (EUC_TWFreq[row][column] != 0)
                        {
                            cnsfreq += EUC_TWFreq[row][column];
                        }
                        else if (35 <= row && row <= 92)
                        {
                            cnsfreq += 150;
                        }
                        i++;
                    }
                }
            }

            rangeval = 50 * ((float)cnschars / (float)dbchars);
            freqval = 50 * ((float)cnsfreq / (float)totalfreq);

            return (int)(rangeval + freqval);
        }

        #endregion

        #region ISO2022CNProbability.....

        /// <summary>
        /// 判断是ISO2022CN编码的可能性
        /// </summary>
        /// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
        /// <returns>返回 0 至 100 之间的可能性</returns>
        internal virtual int ISO2022CNProbability(sbyte[] rawtext)
        {
            int i, rawtextlen = 0;
            int dbchars = 1, isochars = 1;
            long isofreq = 0, totalfreq = 1;
            float rangeval = 0, freqval = 0;
            int row, column;

            // Check to see if characters fit into acceptable ranges
            // and have expected frequency of use

            rawtextlen = rawtext.Length;
            for (i = 0; i < rawtextlen - 1; i++)
            {
                if (rawtext[i] == (sbyte)0x1B && i + 3 < rawtextlen)
                {
                    // Escape char ESC
                    if (rawtext[i + 1] == (sbyte)0x24 && rawtext[i + 2] == 0x29 && rawtext[i + 3] == (sbyte)0x41)
                    {
                        // GB Escape  $ ) A
                        i += 4;
                        while (rawtext[i] != (sbyte)0x1B)
                        {
                            dbchars++;
                            if ((0x21 <= rawtext[i] && rawtext[i] <= 0x77) && (0x21 <= rawtext[i + 1] && rawtext[i + 1] <= 0x77))
                            {
                                isochars++;
                                row = rawtext[i] - 0x21;
                                column = rawtext[i + 1] - 0x21;
                                totalfreq += 500;
                                if (GBFreq[row][column] != 0)
                                {
                                    isofreq += GBFreq[row][column];
                                }
                                else if (15 <= row && row < 55)
                                {
                                    isofreq += 200;
                                }
                                i++;
                            }
                            i++;
                        }
                    }
                    else if (i + 3 < rawtextlen && rawtext[i + 1] == (sbyte)0x24 && rawtext[i + 2] == (sbyte)0x29 && rawtext[i + 3] == (sbyte)0x47)
                    {
                        // CNS Escape $ ) G
                        i += 4;
                        while (rawtext[i] != (sbyte)0x1B)
                        {
                            dbchars++;
                            if ((sbyte)0x21 <= rawtext[i] && rawtext[i] <= (sbyte)0x7E && (sbyte)0x21 <= rawtext[i + 1] && rawtext[i + 1] <= (sbyte)0x7E)
                            {
                                isochars++;
                                totalfreq += 500;
                                row = rawtext[i] - 0x21;
                                column = rawtext[i + 1] - 0x21;
                                if (EUC_TWFreq[row][column] != 0)
                                {
                                    isofreq += EUC_TWFreq[row][column];
                                }
                                else if (35 <= row && row <= 92)
                                {
                                    isofreq += 150;
                                }
                                i++;
                            }
                            i++;
                        }
                    }
                    if (rawtext[i] == (sbyte)0x1B && i + 2 < rawtextlen && rawtext[i + 1] == (sbyte)0x28 && rawtext[i + 2] == (sbyte)0x42)
                    {
                        // ASCII:  ESC ( B
                        i += 2;
                    }
                }
            }

            rangeval = 50 * ((float)isochars / (float)dbchars);
            freqval = 50 * ((float)isofreq / (float)totalfreq);

            return (int)(rangeval + freqval);
        }

        #endregion

        #region UTF8Probability.....

        /// <summary>
        /// 判断是UTF8编码的可能性
        /// </summary>
        /// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
        /// <returns>返回 0 至 100 之间的可能性</returns>
        internal virtual int UTF8Probability(sbyte[] rawtext)
        {
            int score = 0;
            int i, rawtextlen = 0;
            int goodbytes = 0, asciibytes = 0;

            // Maybe also use UTF8 Byte Order Mark:  EF BB BF

            // Check to see if characters fit into acceptable ranges
            rawtextlen = rawtext.Length;
            for (i = 0; i < rawtextlen; i++)
            {
                if ((rawtext[i] & (sbyte)0x7F) == rawtext[i])
                {
                    // One byte
                    asciibytes++;
                    // Ignore ASCII, can throw off count
                }
                else if (-64 <= rawtext[i] && rawtext[i] <= -33 && i + 1 < rawtextlen && -128 <= rawtext[i + 1] && rawtext[i + 1] <= -65)
                {
                    goodbytes += 2;
                    i++;
                }
                else if (-32 <= rawtext[i] && rawtext[i] <= -17 && i + 2 < rawtextlen && -128 <= rawtext[i + 1] && rawtext[i + 1] <= -65 && -128 <= rawtext[i + 2] && rawtext[i + 2] <= -65)
                {
                    goodbytes += 3;
                    i += 2;
                }
            }

            if (asciibytes == rawtextlen)
            {
                return 0;
            }

            score = (int)(100 * ((float)goodbytes / (float)(rawtextlen - asciibytes)));

            // If not above 98, reduce to zero to prevent coincidental matches
            // Allows for some (few) bad formed sequences
            if (score > 98)
            {
                return score;
            }
            else if (score > 95 && goodbytes > 30)
            {
                return score;
            }
            else
            {
                return 0;
            }
        }

        #endregion

        #region UnicodeProbability.....

        /// <summary>
        /// 判断是Unicode编码的可能性
        /// </summary>
        /// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
        /// <returns>返回 0 至 100 之间的可能性</returns>
        internal virtual int UnicodeProbability(sbyte[] rawtext)
        {
            //int score = 0;
            //int i, rawtextlen = 0;
            //int goodbytes = 0, asciibytes = 0;

            if (((sbyte)Identity(0xFE) == rawtext[0] && (sbyte)Identity(0xFF) == rawtext[1]) || ((sbyte)Identity(0xFF) == rawtext[0] && (sbyte)Identity(0xFE) == rawtext[1]))
            {
                return 100;
            }

            return 0;
        }

        #endregion

        #region ASCIIProbability.....

        /// <summary>
        /// 判断是ASCII编码的可能性
        /// </summary>
        /// <param name="rawtext">要判断的 <see cref="sbyte"/> 字节数组</param>
        /// <returns>返回 0 至 100 之间的可能性</returns>
        internal virtual int ASCIIProbability(sbyte[] rawtext)
        {
            int score = 70;
            int i, rawtextlen;

            rawtextlen = rawtext.Length;

            for (i = 0; i < rawtextlen; i++)
            {
                if (rawtext[i] < 0)
                {
                    score = score - 5;
                }
                else if (rawtext[i] == (sbyte)0x1B)
                {
                    // ESC (used by ISO 2022)
                    score = score - 5;
                }
            }

            return score;
        }

        #endregion

        #endregion

        #region Initialize_Frequencies.....

        /// <summary>
        /// 初始化必要的条件
        /// </summary>
        internal virtual void Initialize_Frequencies()
        {
            int i;
            if (GBFreq[0] == null)
            {
                for (i = 0; i < 94; i++)
                {
                    GBFreq[i] = new int[94];
                }

                #region GBFreq[20][35] = 599;

                GBFreq[49][26] = 598;
                GBFreq[41][38] = 597;
                GBFreq[17][26] = 596;
                GBFreq[32][42] = 595;
                GBFreq[39][42] = 594;
                GBFreq[45][49] = 593;
                GBFreq[51][57] = 592;
                GBFreq[50][47] = 591;
                GBFreq[42][90] = 590;
                GBFreq[52][65] = 589;
                GBFreq[53][47] = 588;
                GBFreq[19][82] = 587;
                GBFreq[31][19] = 586;
                GBFreq[40][46] = 585;
                GBFreq[24][89] = 584;
                GBFreq[23][85] = 583;
                GBFreq[20][28] = 582;
                GBFreq[42][20] = 581;
                GBFreq[34][38] = 580;
                GBFreq[45][9] = 579;
                GBFreq[54][50] = 578;
                GBFreq[25][44] = 577;
                GBFreq[35][66] = 576;
                GBFreq[20][55] = 575;
                GBFreq[18][85] = 574;
                GBFreq[20][31] = 573;
                GBFreq[49][17] = 572;
                GBFreq[41][16] = 571;
                GBFreq[35][73] = 570;
                GBFreq[20][34] = 569;
                GBFreq[29][44] = 568;
                GBFreq[35][38] = 567;
                GBFreq[49][9] = 566;
                GBFreq[46][33] = 565;
                GBFreq[49][51] = 564;
                GBFreq[40][89] = 563;
                GBFreq[26][64] = 562;
                GBFreq[54][51] = 561;
                GBFreq[54][36] = 560;
                GBFreq[39][4] = 559;
                GBFreq[53][13] = 558;
                GBFreq[24][92] = 557;
                GBFreq[27][49] = 556;
                GBFreq[48][6] = 555;
                GBFreq[21][51] = 554;
                GBFreq[30][40] = 553;
                GBFreq[42][92] = 552;
                GBFreq[31][78] = 551;
                GBFreq[25][82] = 550;
                GBFreq[47][0] = 549;
                GBFreq[34][19] = 548;
                GBFreq[47][35] = 547;
                GBFreq[21][63] = 546;
                GBFreq[43][75] = 545;
                GBFreq[21][87] = 544;
                GBFreq[35][59] = 543;
                GBFreq[25][34] = 542;
                GBFreq[21][27] = 541;
                GBFreq[39][26] = 540;
                GBFreq[34][26] = 539;
                GBFreq[39][52] = 538;
                GBFreq[50][57] = 537;
                GBFreq[37][79] = 536;
                GBFreq[26][24] = 535;
                GBFreq[22][1] = 534;
                GBFreq[18][40] = 533;
                GBFreq[41][33] = 532;
                GBFreq[53][26] = 531;
                GBFreq[54][86] = 530;
                GBFreq[20][16] = 529;
                GBFreq[46][74] = 528;
                GBFreq[30][19] = 527;
                GBFreq[45][35] = 526;
                GBFreq[45][61] = 525;
                GBFreq[30][9] = 524;
                GBFreq[41][53] = 523;
                GBFreq[41][13] = 522;
                GBFreq[50][34] = 521;
                GBFreq[53][86] = 520;
                GBFreq[47][47] = 519;
                GBFreq[22][28] = 518;
                GBFreq[50][53] = 517;
                GBFreq[39][70] = 516;
                GBFreq[38][15] = 515;
                GBFreq[42][88] = 514;
                GBFreq[16][29] = 513;
                GBFreq[27][90] = 512;
                GBFreq[29][12] = 511;
                GBFreq[44][22] = 510;
                GBFreq[34][69] = 509;
                GBFreq[24][10] = 508;
                GBFreq[44][11] = 507;
                GBFreq[39][92] = 506;
                GBFreq[49][48] = 505;
                GBFreq[31][46] = 504;
                GBFreq[19][50] = 503;
                GBFreq[21][14] = 502;
                GBFreq[32][28] = 501;
                GBFreq[18][3] = 500;
                GBFreq[53][9] = 499;
                GBFreq[34][80] = 498;
                GBFreq[48][88] = 497;
                GBFreq[46][53] = 496;
                GBFreq[22][53] = 495;
                GBFreq[28][10] = 494;
                GBFreq[44][65] = 493;
                GBFreq[20][10] = 492;
                GBFreq[40][76] = 491;
                GBFreq[47][8] = 490;
                GBFreq[50][74] = 489;
                GBFreq[23][62] = 488;
                GBFreq[49][65] = 487;
                GBFreq[28][87] = 486;
                GBFreq[15][48] = 485;
                GBFreq[22][7] = 484;
                GBFreq[19][42] = 483;
                GBFreq[41][20] = 482;
                GBFreq[26][55] = 481;
                GBFreq[21][93] = 480;
                GBFreq[31][76] = 479;
                GBFreq[34][31] = 478;
                GBFreq[20][66] = 477;
                GBFreq[51][33] = 476;
                GBFreq[34][86] = 475;
                GBFreq[37][67] = 474;
                GBFreq[53][53] = 473;
                GBFreq[40][88] = 472;
                GBFreq[39][10] = 471;
                GBFreq[24][3] = 470;
                GBFreq[27][25] = 469;
                GBFreq[26][15] = 468;
                GBFreq[21][88] = 467;
                GBFreq[52][62] = 466;
                GBFreq[46][81] = 465;
                GBFreq[38][72] = 464;
                GBFreq[17][30] = 463;
                GBFreq[52][92] = 462;
                GBFreq[34][90] = 461;
                GBFreq[21][7] = 460;
                GBFreq[36][13] = 459;
                GBFreq[45][41] = 458;
                GBFreq[32][5] = 457;
                GBFreq[26][89] = 456;
                GBFreq[23][87] = 455;
                GBFreq[20][39] = 454;
                GBFreq[27][23] = 453;
                GBFreq[25][59] = 452;
                GBFreq[49][20] = 451;
                GBFreq[54][77] = 450;
                GBFreq[27][67] = 449;
                GBFreq[47][33] = 448;
                GBFreq[41][17] = 447;
                GBFreq[19][81] = 446;
                GBFreq[16][66] = 445;
                GBFreq[45][26] = 444;
                GBFreq[49][81] = 443;
                GBFreq[53][55] = 442;
                GBFreq[16][26] = 441;
                GBFreq[54][62] = 440;
                GBFreq[20][70] = 439;
                GBFreq[42][35] = 438;
                GBFreq[20][57] = 437;
                GBFreq[34][36] = 436;
                GBFreq[46][63] = 435;
                GBFreq[19][45] = 434;
                GBFreq[21][10] = 433;
                GBFreq[52][93] = 432;
                GBFreq[25][2] = 431;
                GBFreq[30][57] = 430;
                GBFreq[41][24] = 429;
                GBFreq[28][43] = 428;
                GBFreq[45][86] = 427;
                GBFreq[51][56] = 426;
                GBFreq[37][28] = 425;
                GBFreq[52][69] = 424;
                GBFreq[43][92] = 423;
                GBFreq[41][31] = 422;
                GBFreq[37][87] = 421;
                GBFreq[47][36] = 420;
                GBFreq[16][16] = 419;
                GBFreq[40][56] = 418;
                GBFreq[24][55] = 417;
                GBFreq[17][1] = 416;
                GBFreq[35][57] = 415;
                GBFreq[27][50] = 414;
                GBFreq[26][14] = 413;
                GBFreq[50][40] = 412;
                GBFreq[39][19] = 411;
                GBFreq[19][89] = 410;
                GBFreq[29][91] = 409;
                GBFreq[17][89] = 408;
                GBFreq[39][74] = 407;
                GBFreq[46][39] = 406;
                GBFreq[40][28] = 405;
                GBFreq[45][68] = 404;
                GBFreq[43][10] = 403;
                GBFreq[42][13] = 402;
                GBFreq[44][81] = 401;
                GBFreq[41][47] = 400;
                GBFreq[48][58] = 399;
                GBFreq[43][68] = 398;
                GBFreq[16][79] = 397;
                GBFreq[19][5] = 396;
                GBFreq[54][59] = 395;
                GBFreq[17][36] = 394;
                GBFreq[18][0] = 393;
                GBFreq[41][5] = 392;
                GBFreq[41][72] = 391;
                GBFreq[16][39] = 390;
                GBFreq[54][0] = 389;
                GBFreq[51][16] = 388;
                GBFreq[29][36] = 387;
                GBFreq[47][5] = 386;
                GBFreq[47][51] = 385;
                GBFreq[44][7] = 384;
                GBFreq[35][30] = 383;
                GBFreq[26][9] = 382;
                GBFreq[16][7] = 381;
                GBFreq[32][1] = 380;
                GBFreq[33][76] = 379;
                GBFreq[34][91] = 378;
                GBFreq[52][36] = 377;
                GBFreq[26][77] = 376;
                GBFreq[35][48] = 375;
                GBFreq[40][80] = 374;
                GBFreq[41][92] = 373;
                GBFreq[27][93] = 372;
                GBFreq[15][17] = 371;
                GBFreq[16][76] = 370;
                GBFreq[51][12] = 369;
                GBFreq[18][20] = 368;
                GBFreq[15][54] = 367;
                GBFreq[50][5] = 366;
                GBFreq[33][22] = 365;
                GBFreq[37][57] = 364;
                GBFreq[28][47] = 363;
                GBFreq[42][31] = 362;
                GBFreq[18][2] = 361;
                GBFreq[43][64] = 360;
                GBFreq[23][47] = 359;
                GBFreq[28][79] = 358;
                GBFreq[25][45] = 357;
                GBFreq[23][91] = 356;
                GBFreq[22][19] = 355;
                GBFreq[25][46] = 354;
                GBFreq[22][36] = 353;
                GBFreq[54][85] = 352;
                GBFreq[46][20] = 351;
                GBFreq[27][37] = 350;
                GBFreq[26][81] = 349;
                GBFreq[42][29] = 348;
                GBFreq[31][90] = 347;
                GBFreq[41][59] = 346;
                GBFreq[24][65] = 345;
                GBFreq[44][84] = 344;
                GBFreq[24][90] = 343;
                GBFreq[38][54] = 342;
                GBFreq[28][70] = 341;
                GBFreq[27][15] = 340;
                GBFreq[28][80] = 339;
                GBFreq[29][8] = 338;
                GBFreq[45][80] = 337;
                GBFreq[53][37] = 336;
                GBFreq[28][65] = 335;
                GBFreq[23][86] = 334;
                GBFreq[39][45] = 333;
                GBFreq[53][32] = 332;
                GBFreq[38][68] = 331;
                GBFreq[45][78] = 330;
                GBFreq[43][7] = 329;
                GBFreq[46][82] = 328;
                GBFreq[27][38] = 327;
                GBFreq[16][62] = 326;
                GBFreq[24][17] = 325;
                GBFreq[22][70] = 324;
                GBFreq[52][28] = 323;
                GBFreq[23][40] = 322;
                GBFreq[28][50] = 321;
                GBFreq[42][91] = 320;
                GBFreq[47][76] = 319;
                GBFreq[15][42] = 318;
                GBFreq[43][55] = 317;
                GBFreq[29][84] = 316;
                GBFreq[44][90] = 315;
                GBFreq[53][16] = 314;
                GBFreq[22][93] = 313;
                GBFreq[34][10] = 312;
                GBFreq[32][53] = 311;
                GBFreq[43][65] = 310;
                GBFreq[28][7] = 309;
                GBFreq[35][46] = 308;
                GBFreq[21][39] = 307;
                GBFreq[44][18] = 306;
                GBFreq[40][10] = 305;
                GBFreq[54][53] = 304;
                GBFreq[38][74] = 303;
                GBFreq[28][26] = 302;
                GBFreq[15][13] = 301;
                GBFreq[39][34] = 300;
                GBFreq[39][46] = 299;
                GBFreq[42][66] = 298;
                GBFreq[33][58] = 297;
                GBFreq[15][56] = 296;
                GBFreq[18][51] = 295;
                GBFreq[49][68] = 294;
                GBFreq[30][37] = 293;
                GBFreq[51][84] = 292;
                GBFreq[51][9] = 291;
                GBFreq[40][70] = 290;
                GBFreq[41][84] = 289;
                GBFreq[28][64] = 288;
                GBFreq[32][88] = 287;
                GBFreq[24][5] = 286;
                GBFreq[53][23] = 285;
                GBFreq[42][27] = 284;
                GBFreq[22][38] = 283;
                GBFreq[32][86] = 282;
                GBFreq[34][30] = 281;
                GBFreq[38][63] = 280;
                GBFreq[24][59] = 279;
                GBFreq[22][81] = 278;
                GBFreq[32][11] = 277;
                GBFreq[51][21] = 276;
                GBFreq[54][41] = 275;
                GBFreq[21][50] = 274;
                GBFreq[23][89] = 273;
                GBFreq[19][87] = 272;
                GBFreq[26][7] = 271;
                GBFreq[30][75] = 270;
                GBFreq[43][84] = 269;
                GBFreq[51][25] = 268;
                GBFreq[16][67] = 267;
                GBFreq[32][9] = 266;
                GBFreq[48][51] = 265;
                GBFreq[39][7] = 264;
                GBFreq[44][88] = 263;
                GBFreq[52][24] = 262;
                GBFreq[23][34] = 261;
                GBFreq[32][75] = 260;
                GBFreq[19][10] = 259;
                GBFreq[28][91] = 258;
                GBFreq[32][83] = 257;
                GBFreq[25][75] = 256;
                GBFreq[53][45] = 255;
                GBFreq[29][85] = 254;
                GBFreq[53][59] = 253;
                GBFreq[16][2] = 252;
                GBFreq[19][78] = 251;
                GBFreq[15][75] = 250;
                GBFreq[51][42] = 249;
                GBFreq[45][67] = 248;
                GBFreq[15][74] = 247;
                GBFreq[25][81] = 246;
                GBFreq[37][62] = 245;
                GBFreq[16][55] = 244;
                GBFreq[18][38] = 243;
                GBFreq[23][23] = 242;

                GBFreq[38][30] = 241;
                GBFreq[17][28] = 240;
                GBFreq[44][73] = 239;
                GBFreq[23][78] = 238;
                GBFreq[40][77] = 237;
                GBFreq[38][87] = 236;
                GBFreq[27][19] = 235;
                GBFreq[38][82] = 234;
                GBFreq[37][22] = 233;
                GBFreq[41][30] = 232;
                GBFreq[54][9] = 231;
                GBFreq[32][30] = 230;
                GBFreq[30][52] = 229;
                GBFreq[40][84] = 228;
                GBFreq[53][57] = 227;
                GBFreq[27][27] = 226;
                GBFreq[38][64] = 225;
                GBFreq[18][43] = 224;
                GBFreq[23][69] = 223;
                GBFreq[28][12] = 222;
                GBFreq[50][78] = 221;
                GBFreq[50][1] = 220;
                GBFreq[26][88] = 219;
                GBFreq[36][40] = 218;
                GBFreq[33][89] = 217;
                GBFreq[41][28] = 216;
                GBFreq[31][77] = 215;
                GBFreq[46][1] = 214;
                GBFreq[47][19] = 213;
                GBFreq[35][55] = 212;
                GBFreq[41][21] = 211;
                GBFreq[27][10] = 210;
                GBFreq[32][77] = 209;
                GBFreq[26][37] = 208;
                GBFreq[20][33] = 207;
                GBFreq[41][52] = 206;
                GBFreq[32][18] = 205;
                GBFreq[38][13] = 204;
                GBFreq[20][18] = 203;
                GBFreq[20][24] = 202;
                GBFreq[45][19] = 201;
                GBFreq[18][53] = 200;

                #endregion
            }

            if (GBKFreq[0] == null)
            {
                for (i = 0; i < 126; i++)
                {
                    GBKFreq[i] = new int[191];
                }

                #region GBKFreq[52][132] = 600;

                GBKFreq[73][135] = 599;
                GBKFreq[49][123] = 598;
                GBKFreq[77][146] = 597;
                GBKFreq[81][123] = 596;
                GBKFreq[82][144] = 595;
     

抱歉!评论已关闭.