一开始写这个函数还调试测试了很多次,但是结果还是差强人意。因为本来他的逻辑就是不对的,后来用了 similar_text替代。
功能:检测提交的文本是否有过度重复,段落对比!
其他:误伤率比较低,过滤功能不行。
/* 重复段落检测 */ public function hasRepeatLine($string) { $string = str_replace(array("\t"," ","@","#","。",",",".",","),'',$string); //$string = str_replace("\r","\n",$string); $lineArr = explode("\n",$string); $countShort = $countMiddle = $countLong = 0; $arr = array(); foreach($lineArr as $lineString) { $length = strlen( $lineString ); if($length < 1) continue; if(in_array($lineString,$arr)) { if($length < 13) { $countShort++; if($countShort > 4) return true;//5次 } elseif($length>12 && $length < 51) { $countMiddle++; if($countMiddle > 3) return true; //4次 } elseif($length>50 && $length < 101) { $countLong++; if($countLong > 2) return true; //3次 } else { return true; } } $arr[] = $lineString; } return false; }