11 defined(
'JPATH_PLATFORM') or die;
31 private static $_regex_consonant =
'(?:[bcdfghjklmnpqrstvwxz]|(?<=[aeiou])y|^y)';
38 private static $_regex_vowel =
'(?:[aeiou]|(?<![aeiou])y)';
50 public function stem($token, $lang)
53 if (strlen($token) <= 2)
65 if (!isset($this->cache[$lang][$token]))
69 $result = self::_step1ab($result);
70 $result = self::_step1c($result);
71 $result = self::_step2($result);
72 $result = self::_step3($result);
73 $result = self::_step4($result);
74 $result = self::_step5($result);
77 $this->cache[$lang][$token] = $result;
80 return $this->cache[$lang][$token];
92 private static function _step1ab($word)
95 if (substr($word, -1) ==
's')
97 self::_replace($word,
'sses',
'ss')
98 or self::_replace($word,
'ies',
'i')
99 or self::_replace($word,
'ss',
'ss')
100 or self::_replace($word,
's',
'');
104 if (substr($word, -2, 1) !=
'e' or !self::_replace($word,
'eed',
'ee', 0))
107 $v = self::$_regex_vowel;
111 if (preg_match(
"#$v+#", substr($word, 0, -3)) && self::_replace($word,
'ing',
'')
112 or preg_match(
"#$v+#", substr($word, 0, -2)) && self::_replace($word,
'ed',
''))
115 if (!self::_replace($word,
'at',
'ate') and !self::_replace($word,
'bl',
'ble') and !self::_replace($word,
'iz',
'ize'))
118 if (self::_doubleConsonant($word) and substr($word, -2) !=
'll' and substr($word, -2) !=
'ss' and substr($word, -2) !=
'zz')
120 $word = substr($word, 0, -1);
122 elseif (self::_m($word) == 1 and self::_cvc($word))
142 private static function _step1c($word)
144 $v = self::$_regex_vowel;
146 if (substr($word, -1) ==
'y' && preg_match(
"#$v+#", substr($word, 0, -1)))
148 self::_replace($word,
'y',
'i');
163 private static function _step2($word)
165 switch (substr($word, -2, 1))
168 self::_replace($word,
'ational',
'ate', 0)
169 or self::_replace($word,
'tional',
'tion', 0);
172 self::_replace($word,
'enci',
'ence', 0)
173 or self::_replace($word,
'anci',
'ance', 0);
176 self::_replace($word,
'izer',
'ize', 0);
179 self::_replace($word,
'logi',
'log', 0);
182 self::_replace($word,
'entli',
'ent', 0)
183 or self::_replace($word,
'ousli',
'ous', 0)
184 or self::_replace($word,
'alli',
'al', 0)
185 or self::_replace($word,
'bli',
'ble', 0)
186 or self::_replace($word,
'eli',
'e', 0);
189 self::_replace($word,
'ization',
'ize', 0)
190 or self::_replace($word,
'ation',
'ate', 0)
191 or self::_replace($word,
'ator',
'ate', 0);
194 self::_replace($word,
'iveness',
'ive', 0)
195 or self::_replace($word,
'fulness',
'ful', 0)
196 or self::_replace($word,
'ousness',
'ous', 0)
197 or self::_replace($word,
'alism',
'al', 0);
200 self::_replace($word,
'biliti',
'ble', 0)
201 or self::_replace($word,
'aliti',
'al', 0)
202 or self::_replace($word,
'iviti',
'ive', 0);
218 private static function _step3($word)
220 switch (substr($word, -2, 1))
223 self::_replace($word,
'ical',
'ic', 0);
226 self::_replace($word,
'ness',
'', 0);
229 self::_replace($word,
'icate',
'ic', 0)
230 or self::_replace($word,
'iciti',
'ic', 0);
233 self::_replace($word,
'ful',
'', 0);
236 self::_replace($word,
'ative',
'', 0);
239 self::_replace($word,
'alize',
'al', 0);
255 private static function _step4($word)
257 switch (substr($word, -2, 1))
260 self::_replace($word,
'al',
'', 1);
263 self::_replace($word,
'ance',
'', 1)
264 or self::_replace($word,
'ence',
'', 1);
267 self::_replace($word,
'er',
'', 1);
270 self::_replace($word,
'ic',
'', 1);
273 self::_replace($word,
'able',
'', 1)
274 or self::_replace($word,
'ible',
'', 1);
277 self::_replace($word,
'ant',
'', 1)
278 or self::_replace($word,
'ement',
'', 1)
279 or self::_replace($word,
'ment',
'', 1)
280 or self::_replace($word,
'ent',
'', 1);
283 if (substr($word, -4) ==
'tion' or substr($word, -4) ==
'sion')
285 self::_replace($word,
'ion',
'', 1);
289 self::_replace($word,
'ou',
'', 1);
293 self::_replace($word,
'ism',
'', 1);
296 self::_replace($word,
'ate',
'', 1)
297 or self::_replace($word,
'iti',
'', 1);
300 self::_replace($word,
'ous',
'', 1);
303 self::_replace($word,
'ive',
'', 1);
306 self::_replace($word,
'ize',
'', 1);
322 private static function _step5($word)
325 if (substr($word, -1) ==
'e')
327 if (self::_m(substr($word, 0, -1)) > 1)
329 self::_replace($word,
'e',
'');
331 elseif (self::_m(substr($word, 0, -1)) == 1)
333 if (!self::_cvc(substr($word, 0, -1)))
335 self::_replace($word,
'e',
'');
341 if (self::_m($word) > 1 and self::_doubleConsonant($word) and substr($word, -1) ==
'l')
343 $word = substr($word, 0, -1);
364 private static function _replace(&$str, $check, $repl, $m = null)
366 $len = 0 - strlen($check);
368 if (substr($str, $len) == $check)
370 $substr = substr($str, 0, $len);
372 if (is_null($m) or self::_m($substr) > $m)
374 $str = $substr . $repl;
399 private static function _m($str)
401 $c = self::$_regex_consonant;
402 $v = self::$_regex_vowel;
404 $str = preg_replace(
"#^$c+#",
'', $str);
405 $str = preg_replace(
"#$v+$#",
'', $str);
407 preg_match_all(
"#($v+$c+)#", $str, $matches);
409 return count($matches[1]);
422 private static function _doubleConsonant($str)
424 $c = self::$_regex_consonant;
426 return preg_match(
"#$c{2}$#", $str, $matches) and $matches[0]{0} == $matches[0]{1};
438 private static function _cvc($str)
440 $c = self::$_regex_consonant;
441 $v = self::$_regex_vowel;
443 $result = preg_match(
"#($c$v$c)$#", $str, $matches)
444 and strlen($matches[1]) == 3
445 and $matches[1]{2} !=
'w'
446 and $matches[1]{2} !=
'x'
447 and $matches[1]{2} !=
'y';