3 namespace Caxy\HtmlDiff\Strategy;
5 use Caxy\HtmlDiff\Preprocessor;
7 class ListItemMatchStrategy implements MatchStrategyInterface
12 protected $similarityThreshold;
17 protected $lengthRatioThreshold;
22 protected $commonTextRatioThreshold;
25 * ListItemMatchStrategy constructor.
27 * @param int $similarityThreshold
28 * @param float $lengthRatioThreshold
29 * @param float $commonTextRatioThreshold
31 public function __construct($similarityThreshold = 80, $lengthRatioThreshold = 0.1, $commonTextRatioThreshold = 0.6)
33 $this->similarityThreshold = $similarityThreshold;
34 $this->lengthRatioThreshold = $lengthRatioThreshold;
35 $this->commonTextRatioThreshold = $commonTextRatioThreshold;
44 public function isMatch($a, $b)
48 // Strip tags and check similarity
49 $aStripped = strip_tags($a);
50 $bStripped = strip_tags($b);
51 similar_text($aStripped, $bStripped, $percentage);
53 if ($percentage >= $this->similarityThreshold) {
57 // Check w/o stripped tags
58 similar_text($a, $b, $percentage);
59 if ($percentage >= $this->similarityThreshold) {
63 // Check common prefix/ suffix length
64 $aCleaned = trim($aStripped);
65 $bCleaned = trim($bStripped);
66 if (mb_strlen($aCleaned) === 0 || mb_strlen($bCleaned) === 0) {
70 if (mb_strlen($aCleaned) === 0 || mb_strlen($bCleaned) === 0) {
73 $prefixIndex = Preprocessor::diffCommonPrefix($aCleaned, $bCleaned);
74 $suffixIndex = Preprocessor::diffCommonSuffix($aCleaned, $bCleaned);
76 // Use shorter string, and see how much of it is leftover
77 $len = min(mb_strlen($aCleaned), mb_strlen($bCleaned));
78 $remaining = $len - ($prefixIndex + $suffixIndex);
79 $strLengthPercent = $len / max(mb_strlen($a), mb_strlen($b));
81 if ($remaining === 0 && $strLengthPercent > $this->lengthRatioThreshold) {
85 $percentCommon = ($prefixIndex + $suffixIndex) / $len;
87 if ($strLengthPercent > 0.1 && $percentCommon > $this->commonTextRatioThreshold) {