<?
class StringComparer {
public $commonWords = array(' ', ',', '.', 'mb', 'kb'); // add more
public $treshold = 0.5; // don't show anything with similarity less than 50%
/**
* Convert to words array excluding common words
*
* @param string $word
* @return array
*/
protected function excludeCommonWords($word) {
static $cache = array();
if (isset($cache[$word])) return $cache[$word];
$word = trim(strtolower($word));
return $cache[$word] = array_diff(array_unique(explode(' ', $word)), $this->commonWords); // exclude common words
}
/**
* Compare words of two strings
*
* @param string $s1
* @param string $s2
* @return float 0.8 means 80% of same words
*/
protected function compareStringsOne($s1, $s2) {
$a1 = $this->excludeCommonWords($s1);
$a2 = $this->excludeCommonWords($s2);
$wordCount = (count($a1)+count($a2))/2;
$intersect = array_intersect($a1, $a2); // found same words
return count($intersect) / $wordCount;
}
/**
* Compare words of string and string/array
*
* @param string $s1
* @param string/array $s2
* @return array
* Result is like
* string2 => 0.9
* string string => 0.5
*/
public function compareStrings($s1, $s2) {
if (!is_array($s2)) return $this->compareStringsOne($s1, $s2);
$result = array();
foreach ($s2 as $s) {
$probability = $this->compareStringsOne($s1, $s);
if ($probability>=$this->treshold) {
$result[$s] = $probability;
}
}
arsort($result);
return $result;
}
}
$comparer = new StringComparer();
// one string
$s1 = 'Плата SVGA Gigabyte PCI-E GV-NX85T256H GF 8500GT 256Mb DDR2 DVI TV-Out OEM';
$s2 = 'PCI-E DDR-2 Gigabyte GV-NX85T256H DVI SLI GeForce 8500GT';
echo('<p>Вероятность: '. ($comparer->compareStrings($s1,$s2)*100) .'%');
$s1 = 'Плата SVGA Gigabyte PCI-E GV-NX85T256H GF 8500GT 256Mb DDR2 DVI TV-Out OEM';
$a = array(
'PCI-E DDR-2 Gigabyte GV-NX85T256H DVI SLI GeForce 8500GT',
'Плата SVGA Gigabyte PCI-E GV-NX85T256H GF 8500GT 256Mb DDR2 DVI TV-Out OEM',
'Плата SVGA Gigabyte PCI-E GV-NX85T256H GF 8500GT 256Mb DDR2 DVI TV-Out',
'DDR-2 Gigabyte GV-NX85T256H DVI SLI GeForce',
);
echo('<p>Вероятности: <pre>');
print_r($comparer->compareStrings($s1,$a));
echo('</pre>');
?>