截取字符函数

支持中文的截取字符函数,不同编码下中文字符的范围一目了然

<?php
/*
* 中文截取,支持gb2312,gbk,utf-8,big5
*
* @param string $str 要截取的字串
* @param int $start 截取起始位置
* @param int $length 截取长度
* @param string $charset utf-8|gb2312|gbk|big5 编码
* @param $suffix 是否加尾缀
*/
public function csubstr($str, $start=0, $length, $charset="utf-8", $suffix=true)
{
    if(function_exists("mb_substr"))
        return mb_substr($str, $start, $length, $charset);
 
    $re['utf-8'] = "/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xff][\x80-\xbf]{3}/";
    $re['gb2312'] = "/[\x01-\x7f]|[\xb0-\xf7][\xa0-\xfe]/";
    $re['gbk'] = "/[\x01-\x7f]|[\x81-\xfe][\x40-\xfe]/";
    $re['big5'] = "/[\x01-\x7f]|[\x81-\xfe]([\x40-\x7e]|\xa1-\xfe])/";
 
    preg_match_all($re[$charset], $str, $match);
    $slice = join("",array_slice($match[0], $start, $length));
    if($suffix) return $slice."…";
 
    return $slice;
}
 
//另外GBK版本的截取字符。实用型,超过多少就截取,否则不处理
/**
 * GBK版本的截取字符
 * 
 * @param $string
 * @param $length 英文字符算一个,汉字算两个
 * @param $is_htmlspecialchars
 * @param $end_with
 * 
 * @return string
 */
function gbk_substr_ifneed($string, $length, $is_htmlspecialchars=0, $end_with="<span class=\"dot\">...</span>")
{
    if( strlen($string) <= $length )
        return $string;
 
    $re_gbk = "/[\x01-\x7f]|[\x81-\xfe][\x40-\xfe]/";    
    preg_match_all($re_gbk, $string, $match);
 
    $new_str = "";
    $now_length = 0;
    //$max_length = $length-strlen($end_with);
    $max_length = $length - 3;
    foreach($match[0] as $char)
    {
        $now_length += strlen($char);//英文字符长度,汉字算两个
        if( $now_length>$max_length ) break;
 
        $new_str .= $char;
    }
 
    if( $is_htmlspecialchars ) {
    	$new_str = htmlspecialchars($new_str);
    }
 
    return $new_str.$end_with;
}
php/substr_chinese_charsets.txt · 最后更改: 2010/04/30 15:38 由 admin
 
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki