similarityThreshold = $similarityThreshold;
$this->lengthRatioThreshold = $lengthRatioThreshold;
$this->commonTextRatioThreshold = $commonTextRatioThreshold;
}
/**
* @param string $a
* @param string $b
*
* @return bool
*/
public function isMatch($a, $b)
{
$percentage = null;
// Strip tags and check similarity
$aStripped = strip_tags($a);
$bStripped = strip_tags($b);
similar_text($aStripped, $bStripped, $percentage);
if ($percentage >= $this->similarityThreshold) {
return true;
}
// Check w/o stripped tags
similar_text($a, $b, $percentage);
if ($percentage >= $this->similarityThreshold) {
return true;
}
// Check common prefix/ suffix length
$aCleaned = trim($aStripped);
$bCleaned = trim($bStripped);
if (strlen($aCleaned) === 0 || strlen($bCleaned) === 0) {
$aCleaned = $a;
$bCleaned = $b;
}
if (strlen($aCleaned) === 0 || strlen($bCleaned) === 0) {
return false;
}
$prefixIndex = Preprocessor::diffCommonPrefix($aCleaned, $bCleaned);
$suffixIndex = Preprocessor::diffCommonSuffix($aCleaned, $bCleaned);
// Use shorter string, and see how much of it is leftover
$len = min(strlen($aCleaned), strlen($bCleaned));
$remaining = $len - ($prefixIndex + $suffixIndex);
$strLengthPercent = $len / max(strlen($a), strlen($b));
if ($remaining === 0 && $strLengthPercent > $this->lengthRatioThreshold) {
return true;
}
$percentCommon = ($prefixIndex + $suffixIndex) / $len;
if ($strLengthPercent > 0.1 && $percentCommon > $this->commonTextRatioThreshold) {
return true;
}
return false;
}
}