现在的位置: 首页 > 综合 > 正文

php中文判断,截取

2012年09月18日 ⁄ 综合 ⁄ 共 2132字 ⁄ 字号 评论关闭

/**
     +----------------------------------------------------------
	 * 是否为中文utf-8
     +----------------------------------------------------------
	 * 作者:老衲
     +----------------------------------------------------------
	 * 修改:老衲
     +----------------------------------------------------------
	 * @param array $wd 信息
     +----------------------------------------------------------
	 */
	function getIsChinese($wd) {
		$ln = $this->utf8_strlen ( $wd );
		
		$result = false;
		for($a = 0; $a <= $ln; $a ++) {
			$hanzi = $this->csubstr ( $wd, $a, 1, "utf-8", false );
			$asc = ord ( substr ( $hanzi, 0, 1 ) );
			if ($asc > 160) {
				$result = true; //中文
			}
		}
		return $result;
	}
	
	/**
     +----------------------------------------------------------
	 * 截取中文
     +----------------------------------------------------------
	 * 作者:老衲
     +----------------------------------------------------------
	 * 修改:老衲
     +----------------------------------------------------------
	 * @param array $wd 信息
     +----------------------------------------------------------
	 */
	function utf8_strlen($string = null) {
		// 将字符串分解为单元
		preg_match_all ( "/./us", $string, $match );
		// 返回单元个数
		return count ( $match [0] );
	}
	
	
	/**
     +----------------------------------------------------------
	 * 中文截取,支持gb2312,gbk,utf-8,big5 
     +----------------------------------------------------------
	 * 作者:老衲
     +----------------------------------------------------------
	 * 修改:老衲
     +----------------------------------------------------------
	 * @param string $str 要截取的字串
     * @param int $start 截取起始位置
     * @param int $length 截取长度
     * @param string $charset utf-8|gb2312|gbk|big5 编码
     * @param $suffix 是否加尾缀
     +----------------------------------------------------------
	 */
	public function csubstr($str, $start = 0, $length, $charset = "utf-8", $suffix = true) {
		if (function_exists ( "mb_substr" )) {
			if (mb_strlen ( $str, $charset ) <= $length)
				return $str;
			$slice = mb_substr ( $str, $start, $length, $charset );
		} else {
			$re ['utf-8'] = "/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xff][\x80-\xbf]{3}/";
			$re ['gb2312'] = "/[\x01-\x7f]|[\xb0-\xf7][\xa0-\xfe]/";
			$re ['gbk'] = "/[\x01-\x7f]|[\x81-\xfe][\x40-\xfe]/";
			$re ['big5'] = "/[\x01-\x7f]|[\x81-\xfe]([\x40-\x7e]|\xa1-\xfe])/";
			preg_match_all ( $re [$charset], $str, $match );
			if (count ( $match [0] ) <= $length)
				return $str;
			$slice = join ( "", array_slice ( $match [0], $start, $length ) );
		}
		if ($suffix)
			return $slice . "…";
		return $slice;
	}

抱歉!评论已关闭.