现在的位置: 首页 > 综合 > 正文

字符串编码传输

2012年06月15日 ⁄ 综合 ⁄ 共 5288字 ⁄ 字号 评论关闭

在robotter项目(我们为日本人开发的一个web应用项目)的开发中遇到了字符串编码问题。robotter项目采用了微软silverlight beta版,中文和日文有时候在web服务器与silverlight客户端传输的过程中遇到了乱码问题。就算是url encoding过的东西,或者base64过的东西也会产生乱码。只好自己写一个算法给项目组解决这个问题。

基本思想,采用64进制来表示数据。这是根据16进制的特点来设计的,因为16进制的表示都是两个英文字母或数字的组合,传输中不会乱码。现在,如果存在足够的数字和英文字母,使得64进制可以被表示,那么所有的数据都可以用这个进制来表示。

动手从ascii码表中选择足够多的字母和数字组合,产生64进制,也就是2的6次方。没有更多的不乱码的字母和数字个数可以达到2的7次方。

// 编码字典

public static readonly string codeDictionary = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz{|";

二进制的6位可以表示64,也就是这个字典可以表示从0 --- 2的6次方 - 1 范围内的任何数。

一、根据上面的理论,每个字节的数据可以被编码成两个字节。

               

每个字节的低6位取出来,以它的值为位置,取出编码字典中的64进制表示符号,而高2位再次编码,这样,一个字节被编码成编码表中数字和英文字母的符号组合。

在实现过程中第二个版本采用了这个编码方法。public static string EncodString2(string rawString)函数和public static string DecodString2(string codingString)函数。

二、在实际应用中发现这种编码有点长。于是改进算法:

                                               

按照6位一个编码的思想,把3个字节的原始数据编码成4个字节的64进制代码符号。按照这个思想实现了编码解码的第三个算法版本。public static string EncodString3(string rawString)函数和public static string DecodString3(string codingString)函数。

经过测试,这种编码的长度不足url编码的一半。

付上源代码:

/// <summary>
    /// 用来编码和解码非ascii字符的字符串
    /// </summary>
    public class RoboterCoder
    {
        // 编码字典
        public static readonly string codeDictionary = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz{|";

        /// <summary>
        /// 在字典中查找代码位置
        /// </summary>
        private static int GetCodePosition(char code)
        {
            int temp = Convert.ToInt32(code);
            if (temp < 65)
                return (temp - 48);
            if (temp >= 65 && temp <= 90)
            {
                return (temp - 55);
            }
            if (temp >= 97 && temp <= 124)
            {
                return (temp - 61);
            }
            throw new RoboterException("RoboterCoder类GetCodePosition方法发生异常", "不正确的码值");
        }

        /// <summary>
        /// 编码字符串
        /// </summary>
        public static string EncodString3(string rawString)
        {
            if (rawString == string.Empty)
                return string.Empty;
            if (rawString == null)
                return null;

            byte[] buffer = Encoding.UTF8.GetBytes(rawString);
            StringBuilder sb = new StringBuilder();
            int j = 0;
            byte oldData = 0;
            byte data = 0;
            for (int i = 0; i < buffer.Length;i++)
            {
                data = buffer[i];
                int c = 0;
                switch (j)
                {
                    case 0:
                        c = (byte)(data << 2);
                        c = c >> 2;
                        oldData = (byte)(data >> 6);
                        j++;
                        sb.Append(codeDictionary[c]);
                        break;
                    case 1:
                        c = (byte)(data << 4);
                        c = c >> 2;
                        c = c | oldData;
                        oldData = (byte)(data >> 4);
                        j++;
                        sb.Append(codeDictionary[c]);
                        break;
                    default:
                        c = (byte)(data << 6);
                        c = c >> 2;
                        c = c | oldData;
                        oldData = (byte)(data >> 2);
                        j=0;
                        sb.Append(codeDictionary[c]);
                        c = oldData;
                        sb.Append(codeDictionary[c]);
                        break;
                }
            }
            switch (j)
            {
                case 1:
                    sb.Append(codeDictionary[oldData]);
                    break;
                case 2:
                    sb.Append(codeDictionary[oldData]);
                    break;
                default:
                    break;
            }
            return sb.ToString();
        }

        /// <summary>
        /// 解码字符串
        /// </summary>
        public static string DecodString3(string codingString)
        {
            if (codingString == string.Empty)
                return string.Empty;
            if (codingString == null)
                return null;

            int length = codingString.Length;
            int bitLength = (length / 4) * 3;
            int remain = (length % 4) - 1;
            if (remain < 0)
                remain = 0;
            length = bitLength + remain;
            byte[] buffer = new byte[length];
            int j = 0;
            byte data = 0;
            byte oldData = 0;
            int pos = 0;

            for (int i = 0; i < codingString.Length;i++)
            {
                char c = codingString[i];
                int index = GetCodePosition(c);
                data = (byte)index;

                switch (j)
                {
                    case 0:
                        oldData = data;
                        j++;
                        break;
                    case 1:
                        oldData = (byte)(oldData | (byte)(data << 6));
                        buffer[pos] = oldData;
                        oldData = (byte)(data >> 2);
                        pos++;
                        j++;
                        break;
                    case 2:
                        oldData = (byte)(oldData | (byte)(data << 4));
                        buffer[pos] = oldData;
                        oldData = (byte)(data >> 4);
                        pos++;
                        j++;
                        break;
                    default:
                        oldData = (byte)(oldData | (byte)(data << 2));
                        buffer[pos] = oldData;
                        pos++;
                        j=0;
                        break;
                }
            }

            string result = Encoding.UTF8.GetString(buffer, 0, buffer.Length);
            return result;
        }

抱歉!评论已关闭.