现在的位置: 首页 > 综合 > 正文

java在汉字字符串中查找拼音和将汉字转换为拼音

2014年01月30日 ⁄ 综合 ⁄ 共 12877字 ⁄ 字号 评论关闭

package com.rayclear.kupai.common;

import java.io.UnsupportedEncodingException;
import java.util.Vector;

public class SamPinyinHandler 
{
/**
* 从valuestring中查找是否包含keystring,暂时不支持多个汉字同时查找
* @param keyString
* @param valueString
* @return
*/
public static boolean checkIfContain(String keyString,String valueString)
{
try {
if(keyString == null || valueString==null || keyString.length()<1 || valueString.length()<1)
{
return false;
}
Vector<String> valuePinYinVector = stringToPinyin(valueString);
StringBuffer valuePinYinStringBuffer = new StringBuffer();
for(String s:valuePinYinVector)
{
valuePinYinStringBuffer.append(s);
}
String valuePinYinString = valuePinYinStringBuffer.toString();
Vector<String> keyPinYinVector = stringToPinyin(keyString);
StringBuffer keyPinYinStringBuffer = new StringBuffer();
for(String s:keyPinYinVector)
{
keyPinYinStringBuffer.append(s);
}
String keyPinYinString = keyPinYinStringBuffer.toString();

keyPinYinString = keyPinYinString.toLowerCase();
keyPinYinString = keyPinYinString.replaceAll(" ", "");
valuePinYinString = valuePinYinString.toLowerCase();
valuePinYinString =valuePinYinString.replaceAll(" ", "");
if(keyPinYinString == null || valuePinYinString==null || valuePinYinString.length()<1 || valuePinYinString.length()<1)
{
return false;
}
if(valuePinYinString.contains(keyPinYinString) || keyPinYinString.contains(valuePinYinString))
{
return true;
}
} catch (Exception e) {
}



return false;
}

/**
* 将一串字符转换为拼音非字母则转换为问号?
* @param strTextPass
* @return
*/
private static Vector<String> stringToPinyin(String strTextPass)
{
Vector<String> PingYing = new Vector<String>();
try {
short ucHigh, ucLow;
   int  nCode;
   String strValue=""; 
   byte[] strText = null;
try {
strText = strTextPass.getBytes("GBK");
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}


   for (int i=0; i<strText.length; i++)
   {
    byte t = strText[i];
    strValue = "";
       while ( t < 0x80 && t >= 0)
{
       
if(Character.isLetter((char)t)) {
strValue=strValue+(char)strText[i];
       
}
       
else {
       
if(strValue.length() > 0)
       
PingYing.add(strValue);
       
strValue = "";
       
}
       
i++;
       
if(i<strText.length)
       
t = strText[i];
       
else
       
break;
}
       if(strValue.length() > 0)
       
PingYing.add(strValue);
       if(i >= strText.length-1)
       
continue;
       ucHigh = (short)(strText[i]+256);
       ucLow  = (short)(strText[i+1]+256);
       if ( ucHigh < 0xa1 || ucLow < 0xa1)
           continue;
       else
           nCode = (ucHigh - 0xa0) * 100 + ucLow - 0xa0;
String strRes=FindLetter(nCode);
PingYing.add(strRes);
i++;
   }
} catch (Exception e) {
}

   return PingYing;
}

/**
* map the code to spell letters
* @param nCode
* @return
*/
private static String FindLetter(int nCode)
{
String strValue = "";
switch(nCode)
{
case 6325:
case 6436:
case 7571:
case 7925:
strValue="A";
break;
case 6263:
case 6440:
case 7040:
case 7208:
case 7451:
case 7733:
case 7945:
case 8616:
strValue="AI";
break;
case 5847:
case 5991:
case 6278:
case 6577:
case 6654:
case 7281:
case 7907:
case 8038:
case 8786:
strValue="AN";
break;
case 5974:
case 6254:
case 6427:
case 6514:
case 6658:
case 6959:
case 7033:
case 7081:
case 7365:
case 8190:
case 8292:
case 8643:
case 8701:
case 8773:
strValue="AO";
break;
case 6056:
case 6135:
case 6517:
case 7857:
case 8446:
case 8649:
case 8741:
strValue="BA";
break;
case 6267:
case 6334:
case 7494:
strValue="BAI";
break;
case 5870:
case 5964:
case 7851:
case 8103:
case 8113:
case 8418:
strValue="BAN";
break;
case 6182:
case 6826:
strValue="BANG";
break;
case 6165:
case 7063:
case 7650:
case 8017:
case 8157:
case 8532:
case 8621:
strValue="BAO";
break;
case 5635:
case 5873:
case 5893:
case 5993:
case 6141:
case 6703:
case 7753:
case 8039:
case 8156:
case 8645:
case 8725:
strValue="BEI";
break;
case 5946:
case 5948:
case 7458:
case 7928:
strValue="BEN";
break;
case 6452:
case 7420:
strValue="BENG";
break;
case 5616:
case 5734:
case 6074:
case 6109:
case 6221:
case 6333:
case 6357:
case 6589:
case 6656:
case 6725:
case 6868:
case 6908:
case 6986:
case 6994:
case 7030:
case 7052:
case 7221:
case 7815:
case 7873:
case 7985:
case 8152:
case 8357:
case 8375:
case 8387:
case 8416:
case 8437:
case 8547:
case 8734:
strValue="BI";
break;
case 5650:
case 5945:
case 6048:
case 6677:
case 6774:
case 7134:
case 7614:
case 7652:
case 7730:
case 7760:
case 8125:
case 8159:
case 8289:
case 8354:
case 8693:
strValue="BIAN";
break;
case 7027:
case 7084:
case 7609:
case 7613:
case 7958:
case 7980:
case 8106:
case 8149:
case 8707:
case 8752:
strValue="BIAO";
break;
case 8531:
strValue="BIE";
break;
case 5747:
case 6557:
case 7145:
case 7167:
case 7336:
case 7375:
case 7587:
case 7957:
case 8738:
case 8762:
strValue="BIN";
break;
case 5787:
case 5891:
case 6280:
strValue="BING";
break;
case 5781:
case 6403:
case 6636:
case 7362:
case 7502:
case 7771:
case 7864:
case 8030:
case 8404:
case 8543:
case 8559:
strValue="BO";
break;
case 6318:
case 6945:
case 7419:
case 7446:
case 7848:
case 7863:
case 8519:
strValue="BU";
break;
case 6474:
case 7769:
strValue="CA";
break;
case 6978:
case 7078:
case 7218:
case 8451:
case 8785:
strValue="CAN";
break;
case 5687:
strValue="CANG";
break;
case 6448:
case 6878:
case 8309:
case 8429:
strValue="CAO";
break;
case 6692:
strValue="CE";
break;
case 6515:
case 6825:
strValue="CEN";
break;
case 6465:
strValue="CENG";
break;
case 6639:
case 6766:
case 7017:
case 7230:
case 7311:
case 7322:
case 7363:
case 7942:
case 7979:
case 8135:
strValue="CHA";
break;
case 5713:
case 7846:
case 8091:
case 8218:
strValue="CHAI";
break;
case 5770:
case 5838:
case 6159:
case 6667:
case 6893:
case 6904:
case 6981:
case 7031:
case 7086:
case 7472:
case 7688:
case 7966:
case 8324:
case 8580:
strValue="CHAN";
break;
case 5686:
case 5943:
case 6041:
case 6137:
case 6660:
case 6568:
case 6749:
case 7029:
case 7047:
case 7438:
case 7509:
case 8680:
strValue="CHANG";
break;
case 6687:
case 7443:
case 8173:
strValue="CHAO";
break;
case 5969:
case 7726:
strValue="CHE";
break;
case 5840:
case 5863:
case 6251:
case 6433:
case 6923:
case 7201:
case 7320:
case 7755:
case 8619:
strValue="CHEN";
break;
case 5609:
case 5984:
case 7239:
case 7263:
case 7583:
case 7810:
case 7881:
case 7905:
case 8146:
case 8241:
case 8508:
strValue="CHENG";
break;
case 5749:
case 6015:
case 6061:
case 6319:
case 6374:
case 6420:
case 6445:
case 6633:
case 7042:
case 7523:
case 7787:
case 8023:
case 8101:
case 8161:
case 8231:
case 8304:
case 8355:
case 8388:
case 8489:
case 8556:
case 8746:
strValue="CHI";
break;
case 6091:
case 6671:
case 6731:
case 8409:
case 8430:
strValue="CHONG";
break;
case 5717:
case 6492:
case 6716:
case 8112:
case 8637:
strValue="CHOU";
break;
case 5601:
case 5927:
case 6680:
case 6732:
case 7109:
case 7238:
case 7290:
case 7343:
case 8150:
case 8260:
case 8573:
case 8777:
strValue="CHU";
break;
case 6285:
case 6408:
case 7590:
case 8563:
strValue="CHUAI";
break;
case 6622:
case 6955:
case 7516:
case 7843:
case 8413:
strValue="CHUAN";
break;
case 6675:
strValue="CHUANG";
break;
case 5879:
case 7302:
case 7319:
strValue="CHUI";
break;
case 6127:
case 8040:
case 8277:
strValue="CHUN";
break;
case 7401:
case 8554:
case 8626:
strValue="CHUO";
break;
case 6075:
case 6358:
case 7684:
case 8043:
case 8457:
strValue="4337 ��";
break;
case 6042:
case 6840:
case 7085:
case 7193:
case 7214:
case 7240:
strValue="CONG";
break;
case 7308:
case 7403:
case 7577:
strValue="COU";
break;
case 6180:
case 6562:
case 6607:
case 7367:
case 8501:
case 8530:
case 8577:
strValue="CU";
break;
case 5764:
case 6305:
case 7664:
case 7973:
strValue="CUAN";
break;
case 6718:
case 6145:
case 6393:
case 7213:
case 7333:
case 7505:
case 8631:
strValue="CUI";
break;
case 6666:
case 8169:
strValue="CUN";
break;
case 5640:
case 6547:
case 7566:
case 7917:
case 7983:
case 8078:
case 8526:
case 8567:
strValue="CUO";
break;
case 6239:
case 6353:
case 6410:
case 6682:
case 7007:
case 8155:
case 8346:
case 8716:
case 8718:
strValue="DA";
break;
case 6004:
case 6316:
case 6523:
case 6942:
case 7110:
case 7173:
case 8776:
strValue="DAI";
break;
case 5757:
case 6144:
case 6402:
case 7373:
case 7470:
case 7781:
case 8067:
case 8087:
case 8185:
case 8376:
strValue="DAN";
break;
case 5852:
case 5942:
case 6148:
case 6920:
case 7724:
case 7885:
case 8141:
strValue="DANG";
break;
case 6322:
case 6665:
case 7514:
case 8478:
strValue="DAO";
break;
case 7929:
strValue="DE";
break;
case 6466:
case 6556:
case 7413:
case 7767:
case 7975:
case 8403:
strValue="DENG";
break;
case 5621:
case 5765:
case 5814:
case 5848:
case 5901:
case 5970:
case 6122:
case 6454:
case 7023:
case 7116:
case 7260:
case 7306:
case 7475:
case 7738:
case 7758:
case 7791:
case 7965:
case 8438:
case 8730:
strValue="DI";
break;
case 6439:
strValue="DIA";
break;
case 5871:
case 5967:
case 6559:
case 7172:
case 7868:
case 8116:
case 8118:
case 8401:
case 8558:
strValue="DIAN";
break;
case 7886:
case 8585:
case 8684:
strValue="DIAO";
break;
case 5976:
case 6006:
case 6273:
case 6409:
case 7526:
case 8012:
case 8183:
case 8562:
case 8688:
strValue="DIE";
break;
case 5674:
case 6404:
case 7164:
case 7575:
case 7754:
case 7814:
case 8059:
case 8184:
case 8490:
strValue="DING";
break;
case 7891:
strValue="DIU";
break;
case 5977:
case 6343:
case 6520:
case 6528:
case 7517:
case 7543:
case 7556:
case 7747:
case 8020:
strValue="DONG";
break;
case 6190:
case 8128:
case 8229:
case 8391:
strValue="DOU";
break;
case 6022:
case 6429:
case 6834:
case 7292:
case 7525:
case 8328:
case 8338:
case 8739:
case 8782:
strValue="DU";
break;
case 7318:
case 7649:
case 8393:
strValue="DUAN";
break;
case 7701:
case 7713:
case 7752:
strValue="DUI";
break;
case 6771:
case 7632:
case 7727:
case 7766:
case 7779:
case 7970:
case 8527:
strValue="DUN";
break;
case 6345:
case 6365:
case 6785:
case 7122:
case 7876:
case 8154:
case 8566:
strValue="DUO";
break;
case 5612:
case 5832:
case 5844:
case 5949:
case 6035:
case 6113:
case 6164:
case 6332:
case 6721:
case 6977:
case 7025:
case 7378:
case 7581:
case 7916:
case 7941:
case 8042:
case 8206:
case 8689:
strValue="E";
break;
case 6176:
case 6284:
strValue="EN";
break;
case 5706:
case 6939:
case 7177:
case 7879:
case 8025:
case 8660:
strValue="ER";
break;
case 5950:
case 7732:
strValue="FA";
break;
case 6212:
case 6232:
case 6506:
case 7283:
case 7660:
case 7818:
case 8576:
strValue="FAN";
break;
case 5890:
case 7242:
case 7853:
case 8419:
case 8648:
strValue="FANG";
break;
case 6032:
case 6584:
case 6713:
case 6839:
case 6990:
case 7119:
case 7328:
case 7572:
case 7619:
case 7673:
case 7948:
case 8082:
case 8267:
case 8385:
case 8468:
case 8613:
case 8678:
strValue="FEI";
break;
case 5739:
case 6915:
case 7291:
case 8687:
case 8787:
strValue="FEN";
break;
case 5726:
case 5926:
case 6155:
case 6384:
case 6767:
case 7731:
strValue="FENG";
break;
case 8330:
strValue="FOU";
break;
case 5775:
case 5776:
case 5914:
case 6029:
case 6062:
case 6119:
case 6142:
case 6252:
case 6327:
case 6505:
case 6686:
case 6870:
case 6985:
case 7058:
case 7066:
case 7106:
case 7108:
case 7285:
case 7471:
case 7680:
case 7741:
case 7774:
case 7775:
case 7823:
case 7991:
case 8005:
case 8222:
case 8261:
case 8280:
case 8283:
case 8479:
case 8535:
case 8538:
case 8654:
case 8691:
strValue="FU";
break;
case 6246:
case 7056:
case 7057:
case 7424:
case 7837:
strValue=" GA";
break;
case 5604:
case 5875:
case 5982:
case 7414:
case 7464:
strValue="GAI";
break;
case 5965:
case 6053:
case 6247:
case 6306:
case 6779:
case 6838:
case 6887:
case 7104:
case 7347:
case 7426:
case 7723:
case 8065:
case 8491:
strValue="GAN";
break;
case 7716:
case 7824:
case 8364:
strValue="GANG";
break;
case 5626:
case 5830:
case 5912:
case 6227:
case 7141:
case 7332:
case 7334:
case 7429:
case 7915:
strValue="GAO";
break;
case 5610:
case 5678:
case 5933:
case 5957:
case 6010:
case 6435:
case 7092:
case 7501:
case 7585:
case 7749:
case 7951:
case 8143:
case 8220:
case 8420:
case 8732:
strValue="GE";
break;
case 5608:
case 6102:
case 6371:
case 8462:
strValue="GEN";
break;
case 6376:
case 6657:
case 7114:
case 8665:
strValue="GENG";
break;
case 7178:
case 7537:
case 8228:
case 8601:
strValue="GONG";
break;
case 5694:
case 5824:
case 6524:
case 6960:
case 7037:
case 7135:
case 7259:
case 7477:
case 7616:
case 8349:
case 8384:
case 8724:
strValue="GOU";
break;
case 5637:
case 5812:
case 6152:
case 6536:
case 6773:
case 7284:
case 7379:
case 7484:
case 7486:
case 7591:
case 7617:
case 7813:
case 7825:
case 7860:
case 7932:
case 8019:
case 8083:
case 8233:
case 8494:
case 8593:
case 8681:
case 8729:
strValue="GU";
break;
case 5652:
case 5820:
case 6341:
case 7273:
case 7550:
case 8027:
strValue="GUA";
break;
case 5736:
case 6124:
case 6272:
case 6842:
case 7834:
case 8057:
case 8170:
case 8704:
strValue="GUAN";
break;
case 6359:
case 6578:
case 7270:
case 7555:
strValue="GUANG";
break;
case 5648:
case 5659:
case 6649:
case 7003:
case 7277:
case 7433:
case 7448:
case 8007:
case 8394:
case 8657:
case 8712:
strValue="GUI";
break;
case 5782:
case 7121:
case 7762:
case 8671:
strValue="GUN";
break;
case 5769:
case 6266:
case 6335:
case 6494:
case 6538:
case 6603:
case 7304:
case 7529:
case 8188:
case 8268:
case 8269:
strValue="GUO";
break;
case 7894:
strValue="HA";
break;
case 6443:
case 7560:
case 8516:
strValue="HAI";
break;
case 5885:
case 6153:
case 6294:
case 6759:
case 6911:
case 7447:
case 7642:
case 8192:
case 8205:
case 8232:
case 8793:
strValue="HAN";
break;
case 6776:
case 7112:
case 8194:
strValue="HANG";
break;
case 6179:
case 6222:
case 6438:
case 6467:
case 6909:
case 6916:
case 7427:
case 8009:
case 8211:
case 8226:
strValue="HAO";
break;
case 5813:
case 5932:
case 5954:
case 6432:
case 6756:
case 7434:
case 7833:
case 8202:
case 8234:
case 8471:
strValue="HE";
break;
case 6231:
case 7181:
case 7276:
strValue="HENG";
break;
case 5768:
case 5774:
case 5807:
case 6106:
case 6214:
case 6216:
case 6740:
case 6792:
strValue="HONG";
break;
case 6009:
case 6565:
case 6943:
case 8090:
case 8383:
case 8455:
case 8655:
case 8731:
strValue="HOU";
break;
case 5792:
case 6392:
case 6481:
case 6518:
case 6609:
case 6679:
case 6717:
case 6816:
case 6879:
case 7190:
case 7346:
case 7385:
case 7618:
case 7635:
case 7646:
case 7670:
case 7672:
case 7679:
case 8013:
case 8032:
case 8041:
case 8055:
case 8343:
case 8513:
case 8590:
strValue="HU";
break;
case 7072:
case 7275:
case 7725:
case 7892:
strValue="HUA";
break;
case 8555:
strValue="HUAI";
break;
case 5928:
case 6140:
case 6307:
case 6487:
case 6621:
case 6801:
case 6829:
case 6881:
case 6930:
case 6953:
case 7157:
case 7944:
case 8673:
case 8763:
strValue="HUAN";
break;
case 5882:
case 6569:
case 6850:
case 6874:
case 6956:
case 7211:
case 7533:
case 8105:
case 8308:
case 8382:
case 8692:
strValue="HUANG";
break;
case 5822:
case 6078:
case 6086:
case 6205:
case 6352:
case 6360:
case 6425:
case 6736:
case 6807:
case 6811:
case 6971:
case 7132:
case 7185:
case 7445:
case 7703:
case 8219:
case 8319:
case 8766:
strValue="HUI";
break;
case 5827:
case 6638:
case 6752:
case 6867:
strValue="HUN";
break;
case 5669:
case 6229:
case 6311:
case 6475:
case 6623:
case 7856:
case 7933:
case 7976:
case 8175:
case 8322:
strValue="HUO";
break;
case 5629:
case 5632:
case 566

抱歉!评论已关闭.