现在的位置: 首页 > 综合 > 正文

网上找到的各个编码之间的转换

2012年10月31日 ⁄ 综合 ⁄ 共 4388字 ⁄ 字号 评论关闭

package com.trs.infra.util;

import java.io.IOException;

public class CharacterConvertor {

    /**
     * @param args
     */
    public static void main(String[] args) {
        // TODO Auto-generated method stub

    }

    static public String convertUTF8String2Unicode(String instr)
            throws IOException {
        // byte[] strbytes = instr.getBytes();
        int charindex = instr.length();
        int actualValue;
        int inputValue;
        StringBuffer sbtemp = new StringBuffer();

        for (int i = 0; i < charindex;) {

            actualValue = -1;
            inputValue = instr.charAt(i++);

            inputValue &= 0xff;

            if ((inputValue & 0x80) == 0) {
                actualValue = inputValue;
            } else if ((inputValue & 0xF8) == 0xF0) {
                actualValue = (inputValue & 0x1f) << 18;

                int nextByte = instr.charAt(i++) & 0xff;
                if ((nextByte & 0xC0) != 0x80)
                    throw new IOException("Invalid UTF-8 format");
                actualValue += (nextByte & 0x3F) << 12;

                nextByte = instr.charAt(i++) & 0xff;
                if ((nextByte & 0xC0) != 0x80)
                    throw new IOException("Invalid UTF-8 format");
                actualValue += (nextByte & 0x3F) << 6;

                nextByte = instr.charAt(i++) & 0xff;
                if ((nextByte & 0xC0) != 0x80)
                    throw new IOException("Invalid UTF-8 format");
                actualValue += (nextByte & 0x3F);
            } else if ((inputValue & 0xF0) == 0xE0) {
                actualValue = (inputValue & 0x1f) << 12;

                int nextByte = instr.charAt(i++) & 0xff;
                if ((nextByte & 0xC0) != 0x80)
                    throw new IOException("Invalid UTF-8 format");
                actualValue += (nextByte & 0x3F) << 6;

                nextByte = instr.charAt(i++) & 0xff;
                if ((nextByte & 0xC0) != 0x80)
                    throw new IOException("Invalid UTF-8 format");
                actualValue += (nextByte & 0x3F);
            } else if ((inputValue & 0xE0) == 0xC0) {
                actualValue = (inputValue & 0x1f) << 6;

                int nextByte = instr.charAt(i++) & 0xff;
                if ((nextByte & 0xC0) != 0x80)
                    throw new IOException("Invalid UTF-8 format");
                actualValue += (nextByte & 0x3F);
            }
            sbtemp.append((char) actualValue);
        }

        return sbtemp.toString();
    }

    /**
     * Unicode是双字节编码,一个字符占2个字节 <br>
     * UCS-2(Unicode-16):2个字节字符编码 <br>
     * UCS-4(Unicode-32):4个字节字符编码
     *
     * @param instr
     * @return
     */
    public static byte[] convertUnicode2UTF8Byte(String instr) {
        int len = instr.length();
        byte[] abyte = new byte[len << 2];//字符长度的四倍
        int j = 0;
        for (int i = 0; i < len; i++) {
            char c = instr.charAt(i);

            if (c < 0x80) {
                abyte[j++] = (byte) c;
            } else if (c < 0x0800) {
                abyte[j++] = (byte) (((c >> 6) & 0x1F) | 0xC0);
                abyte[j++] = (byte) ((c & 0x3F) | 0x80);
            } else if (c < 0x010000) {
                abyte[j++] = (byte) (((c >> 12) & 0x0F) | 0xE0);
                abyte[j++] = (byte) (((c >> 6) & 0x3F) | 0x80);
                abyte[j++] = (byte) ((c & 0x3F) | 0x80);
            } else if (c < 0x200000) {
                abyte[j++] = (byte) (((c >> 18) & 0x07) | 0xF8);
                abyte[j++] = (byte) (((c >> 12) & 0x3F) | 0x80);
                abyte[j++] = (byte) (((c >> 6) & 0x3F) | 0x80);
                abyte[j++] = (byte) ((c & 0x3F) | 0x80);
            }
        }

        byte[] retbyte = new byte[j];
        for (int i = 0; i < j; i++) {
            retbyte[i] = abyte[i];
        }
        return retbyte;
    }

    /**
     * Unicode是双字节编码,一个字符占2个字节 IDSO 是单字节编码
     *
     * @param myByte
     * @return
     */
    public static String ISO106462Unicode(byte[] myByte) {
        String result = new String("");

        StringBuffer sb = new StringBuffer("");
        try {
            /* 将字符串转换成byte数组 */
            // byte[] myByte= str.getBytes("ISO10646");
            int len = myByte.length;

            for (int i = 0; i < len; i = i + 2) {
                byte hiByte = myByte[i]; // 高八位
                byte loByte = myByte[i + 1]; // 低八位

                int ch = (int) hiByte << 8;
                ch = ch & 0xff00;
                ch += (int) loByte & 0xff;

                sb.append((char) ch);
            }

            result = new String(sb.toString());

        } catch (Exception e) {
            System.out.println("Encoding Error");
        }
        return result;
    }

    /**
     * Unicode是双字节编码,一个字符占2个字节
     *
     * @param s
     * @return
     */
    public static byte[] Unicode2Byte(String s) {
        int len = s.length();
        byte abyte[] = new byte[len << 1];
        int j = 0;
        for (int i = 0; i < len; i++) {
            char c = s.charAt(i);
            abyte[j++] = (byte) (c & 0xff);
            abyte[j++] = (byte) (c >> 8);
        }

        return abyte;
    }

}

抱歉!评论已关闭.