现在的位置: 首页 > 综合 > 正文

将字符串utf-8编码后再url编码

2013年09月26日 ⁄ 综合 ⁄ 共 4511字 ⁄ 字号 评论关闭

背景:客户端(c++编写)要向打开一个web查询页,需要拼凑一个查询的url并在浏览器中打开

 

比如在google中查询"马"

http://www.google.cn/search?hl=zh-CN&newwindow=1&q=%E9%A9%AC&aq=f&oq=

 

注意这个"马"被utf-8编码后再url编码,这在java,js,php,c#中都有类似urlencode,escape等方法直接拿来用,我都搞定了,就剩下这个c++客户端了,却没有现成的API直接用

 

 

下面就是要用c++实现这个功能

首先是转utf-8编码,这就需要使用LibIconv(http://gnuwin32.sourceforge.net/packages/libiconv.htm),将头文件和库包含进去后,就可以使用了convertcode函数了

 

其次是url编码,这次我又从http://www.codeguru.com/cpp/cpp/cpp_mfc/article.php/c4029/上找到了方法,大家可以参考下

 

 

下面分享代码:

/*****************************************************************************
Module :     URLEncode.H
Notices:     Written 2002 by ChandraSekar Vuppalapati
Description: H URL Encoder
*****************************************************************************/
#ifndef __CURLENCODE_H_
#define __CURLENCODE_H_

#include "stdafx.h"

class CURLEncode
{
private:
 static CString csUnsafeString;
 CString decToHex(char num, int radix);
 bool isUnsafe(char compareChar);
 CString convert(char val);

public:
 CURLEncode() { };
 virtual ~CURLEncode() { };
 CString URLEncode(CString vData);
 int convertcode(const char *inbuf,int inlen,char *outbuf,int outlen);
};

#endif //__CURLENCODE_H_

 

 

 

/*****************************************************************************
Module :     URLEncode.cpp
Notices:     Written 2002 by ChandraSekar Vuppalapati
Description: CPP URL Encoder
*****************************************************************************/
#define _CRTDBG_MAP_ALLOC

#include "stdafx.h"
#include <math.h>
#include <malloc.h>
#include <memory.h>
#include <new.h>
#include <stdlib.h>
#include <string.h>
#include <WININET.H>

#include "URLEncode.h"
#include "iconv.h"

#define MAX_BUFFER_SIZE 4096
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

#include <stdlib.h>
#include <crtdbg.h>
// HEX Values array
char hexVals[16] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
// UNSAFE String
CString CURLEncode::csUnsafeString= "/"<>%//^[]`+$,@:;/!#?=&";

// PURPOSE OF THIS FUNCTION IS TO CONVERT A GIVEN CHAR TO URL HEX FORM
CString CURLEncode::convert(char val)
{
 CString csRet;
 csRet += "%";
 csRet += decToHex(val, 16); 
 return  csRet;
}

// THIS IS A HELPER FUNCTION.
// PURPOSE OF THIS FUNCTION IS TO GENERATE A HEX REPRESENTATION OF GIVEN CHARACTER
CString CURLEncode::decToHex(char num, int radix)

 int temp=0; 
 CString csTmp;
 int num_char;
 num_char = (int) num;
 
 // ISO-8859-1
 // IF THE IF LOOP IS COMMENTED, THE CODE WILL FAIL TO GENERATE A
 // PROPER URL ENCODE FOR THE CHARACTERS WHOSE RANGE IN 127-255(DECIMAL)
 if (num_char < 0)
  num_char = 256 + num_char;

 while (num_char >= radix)
    {
  temp = num_char % radix;
  num_char = (int)floor((double)(num_char / radix));
  csTmp = hexVals[temp];
    }
 
 csTmp += hexVals[num_char];

 if(csTmp.GetLength() < 2)
 {
  csTmp += '0';
 }

 CString strdecToHex(csTmp);
 // Reverse the String
 strdecToHex.MakeReverse();
 
 return strdecToHex;
}

// PURPOSE OF THIS FUNCTION IS TO CHECK TO SEE IF A CHAR IS URL UNSAFE.
// TRUE = UNSAFE, FALSE = SAFE
bool CURLEncode::isUnsafe(char compareChar)
{
 bool bcharfound = false;
 char tmpsafeChar;
 int m_strLen = 0;
 
 m_strLen = csUnsafeString.GetLength();
 for(int ichar_pos = 0; ichar_pos < m_strLen ;ichar_pos++)
 {
  tmpsafeChar = csUnsafeString.GetAt(ichar_pos);
  if(tmpsafeChar == compareChar)
  {
   bcharfound = true;
   break;
  }
 }
 int char_ascii_value = 0;
 //char_ascii_value = __toascii(compareChar);
 char_ascii_value = (int) compareChar;

 if(bcharfound == false &&  char_ascii_value > 32 && char_ascii_value < 123)
 {
  return false;
 }
 // found no unsafe chars, return false  
 else
 {
  return true;
 }
 
 return true;
}
// PURPOSE OF THIS FUNCTION IS TO CONVERT A STRING
// TO URL ENCODE FORM.
CString CURLEncode::URLEncode(CString pcsEncode)

 int ichar_pos;
 CString csEncode;
 CString csEncoded; 
 int m_length;
 int ascii_value;

 csEncode = pcsEncode;
 m_length = csEncode.GetLength();
 
 for(ichar_pos = 0; ichar_pos < m_length; ichar_pos++)
 {
  char ch = csEncode.GetAt(ichar_pos);
  if (ch < ' ')
  {
   ch = ch;
  }  
  if(!isUnsafe(ch))
  {
   // Safe Character    
   csEncoded += CString(ch);
  }
  else
  {
   // get Hex Value of the Character
   csEncoded += convert(ch);
  }
 }
 

 return csEncoded;

}

//convert gb2312 to utf-8
int CURLEncode::convertcode(const char *inbuf,int inlen,char *outbuf,int outlen)
{
        iconv_t handle;
 
        const char **pin=&inbuf;
 
        char **pout=&outbuf;
 
        handle=iconv_open("utf-8","gb2312");
 
        if( handle == 0 )
                printf("error!/n");
 
        memset(outbuf,0,outlen);
 
        if(iconv(handle,pin,(size_t *)&inlen,pout,(size_t *)&outlen) == -1){
                printf("iconv error!/n");
                return -1;
        }
 
        iconv_close(handle);
 
        return 0;
}

 

 

 

 

 

main(){

 

     CURLEncode url_encode;

     int len1,len2=20;

     char *apin="七星";

        len1=strlen(apin);

        char apout[20];

        url_encode.convertcode(apin,len1,apout,len2);

    AfxMessageBox(apout);

 

}

 

抱歉!评论已关闭.