96 $this->slast = $this->_sbase + $this->_lcount * $this->_vcount *
$this->_tcount;
97 if (function_exists(
'file_get_contents')) {
98 $this->NP = unserialize(file_get_contents(dirname(__FILE__).
'/npdata.ser'));
100 $this->NP = unserialize(join(
'', file(dirname(__FILE__).
'/npdata.ser')));
103 if (is_array($options)) {
127 if (!is_array($option)) {
128 $option = array($option => $value);
130 foreach ($option as $k => $v) {
137 $this->_api_encoding = $v;
140 $this->
_error(
'Set Parameter: Unknown parameter '.$v.
' for option '.$k);
145 $this->_allow_overlong = ($v) ?
true :
false;
148 $this->_strict_mode = ($v) ?
true :
false;
151 $this->
_error(
'Set Parameter: Unknown option '.$k);
165 function decode($input, $one_time_encoding =
false)
168 if ($one_time_encoding) {
169 switch ($one_time_encoding) {
175 $this->
_error(
'Unknown encoding '.$one_time_encoding);
180 $input = trim($input);
184 if (strpos($input,
'@')) {
186 if ($this->_strict_mode) {
187 $this->
_error(
'Only simple domain name parts can be handled in strict mode');
190 list ($email_pref, $input) = explode(
'@', $input, 2);
191 $arr = explode(
'.', $input);
192 foreach ($arr as $k => $v) {
193 if (preg_match(
'!^'.preg_quote($this->_punycode_prefix,
'!').
'!', $v)) {
195 if ($conv) $arr[$k] = $conv;
198 $input = join(
'.', $arr);
199 $arr = explode(
'.', $email_pref);
200 foreach ($arr as $k => $v) {
201 if (preg_match(
'!^'.preg_quote($this->_punycode_prefix,
'!').
'!', $v)) {
203 if ($conv) $arr[$k] = $conv;
206 $email_pref = join(
'.', $arr);
207 $return = $email_pref .
'@' . $input;
208 } elseif (preg_match(
'![:\./]!', $input)) {
210 if ($this->_strict_mode) {
211 $this->
_error(
'Only simple domain name parts can be handled in strict mode');
214 $parsed = parse_url($input);
215 if (isset($parsed[
'host'])) {
216 $arr = explode(
'.', $parsed[
'host']);
217 foreach ($arr as $k => $v) {
219 if ($conv) $arr[$k] = $conv;
221 $parsed[
'host'] = join(
'.', $arr);
223 (empty($parsed[
'scheme']) ?
'' : $parsed[
'scheme'].(strtolower($parsed[
'scheme']) ==
'mailto' ?
':' :
'://'))
224 .(empty($parsed[
'user']) ?
'' : $parsed[
'user'].(empty($parsed[
'pass']) ?
'' :
':'.$parsed[
'pass']).
'@')
226 .(empty($parsed[
'port']) ?
'' :
':'.$parsed[
'port'])
227 .(empty($parsed[
'path']) ?
'' : $parsed[
'path'])
228 .(empty($parsed[
'query']) ?
'' :
'?'.$parsed[
'query'])
229 .(empty($parsed[
'fragment']) ?
'' :
'#'.$parsed[
'fragment']);
231 $arr = explode(
'.', $input);
232 foreach ($arr as $k => $v) {
234 $arr[$k] = ($conv) ? $conv : $v;
236 $return = join(
'.', $arr);
239 $return = $this->
_decode($input);
240 if (!$return) $return = $input;
244 switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
255 $this->
_error(
'Unsupported output format');
267 function encode($decoded, $one_time_encoding =
false)
271 switch ($one_time_encoding ? $one_time_encoding : $this->_api_encoding) {
280 $this->
_error(
'Unsupported input format: '.($one_time_encoding ? $one_time_encoding : $this->_api_encoding));
285 if (empty($decoded))
return '';
291 foreach ($decoded as $k => $v) {
306 if ($this->_strict_mode) {
307 $this->
_error(
'Neither email addresses nor URLs are allowed in strict mode.');
313 $encoded = $this->
_encode(array_slice($decoded, $last_begin, (($k)-$last_begin)));
317 $output .= $this->
_ucs4_to_utf8(array_slice($decoded, $last_begin, (($k)-$last_begin)));
319 $output .= chr($decoded[$k]);
321 $last_begin = $k + 1;
327 $inp_len =
sizeof($decoded);
329 $encoded = $this->
_encode(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
333 $output .= $this->
_ucs4_to_utf8(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
337 if ($output = $this->
_encode($decoded)) {
363 if (!preg_match(
'!^'.preg_quote($this->_punycode_prefix,
'!').
'!', $encoded)) {
364 $this->
_error(
'This is not a punycode string');
367 $encode_test = preg_replace(
'!^'.preg_quote($this->_punycode_prefix,
'!').
'!',
'', $encoded);
370 $this->
_error(
'The given encoded string was empty');
374 $delim_pos = strrpos($encoded,
'-');
375 if ($delim_pos > strlen($this->_punycode_prefix)) {
376 for ($k = strlen($this->_punycode_prefix); $k < $delim_pos; ++$k) {
377 $decoded[] = ord($encoded{$k});
382 $deco_len = count($decoded);
383 $enco_len = strlen($encoded);
391 for ($enco_idx = ($delim_pos) ? ($delim_pos + 1) : 0; $enco_idx < $enco_len; ++$deco_len) {
392 for ($old_idx = $idx, $w = 1, $k = $this->_base; 1 ; $k +=
$this->_base) {
395 $t = ($k <= $bias) ? $this->_tmin :
396 (($k >= $bias + $this->_tmax) ? $this->_tmax : ($k - $bias));
397 if ($digit < $t)
break;
398 $w = (int) ($w * ($this->_base - $t));
400 $bias = $this->
_adapt($idx - $old_idx, $deco_len + 1, $is_first);
402 $char += (int) ($idx / ($deco_len + 1));
403 $idx %= ($deco_len + 1);
406 for ($i = $deco_len; $i > $idx; $i--) {
407 $decoded[$i] = $decoded[($i - 1)];
410 $decoded[$idx++] = $char;
422 $extract = strlen($this->_punycode_prefix);
424 $check_deco = array_slice($decoded, 0, $extract);
426 if ($check_pref == $check_deco) {
427 $this->
_error(
'This is already a punycode string');
432 foreach ($decoded as $k => $v) {
439 $this->
_error(
'The given string does not contain encodable chars');
445 if (!$decoded || !is_array($decoded))
return false;
447 $deco_len = count($decoded);
448 if (!$deco_len)
return false;
454 for ($i = 0; $i < $deco_len; ++$i) {
455 $test = $decoded[$i];
457 if ((0x2F < $test && $test < 0x40) || (0x40 < $test && $test < 0x5B)
458 || (0x60 < $test && $test <= 0x7B) || (0x2D == $test)) {
459 $encoded .= chr($decoded[$i]);
463 if ($codecount == $deco_len)
return $encoded;
466 $encoded = $this->_punycode_prefix.$encoded;
469 if ($codecount) $encoded .=
'-';
476 while ($codecount < $deco_len) {
479 for ($i = 0, $next_code = $this->_max_ucs; $i < $deco_len; $i++) {
480 if ($decoded[$i] >= $cur_code && $decoded[$i] <= $next_code) {
481 $next_code = $decoded[$i];
485 $delta += ($next_code - $cur_code) * ($codecount + 1);
486 $cur_code = $next_code;
489 for ($i = 0; $i < $deco_len; $i++) {
490 if ($decoded[$i] < $cur_code) {
492 } elseif ($decoded[$i] == $cur_code) {
493 for ($q = $delta, $k = $this->_base; 1; $k +=
$this->_base) {
494 $t = ($k <= $bias) ? $this->_tmin :
495 (($k >= $bias + $this->_tmax) ? $this->_tmax : $k - $bias);
497 $encoded .= $this->
_encode_digit(intval($t + (($q - $t) % ($this->_base - $t))));
498 $q = (int) (($q - $t) / ($this->_base - $t));
501 $bias = $this->
_adapt($delta, $codecount+1, $is_first);
517 function _adapt($delta, $npoints, $is_first)
519 $delta = intval($is_first ? ($delta / $this->_damp) : ($delta / 2));
520 $delta += intval($delta / $npoints);
522 $delta = intval($delta / ($this->_base - $this->_tmin));
524 return intval($k + ($this->_base - $this->_tmin + 1) * $delta / ($delta + $this->_skew));
533 return chr($d + 22 + 75 * ($d < 26));
543 return ($cp - 48 < 10) ? $cp - 22 : (($cp - 65 < 26) ? $cp - 65 : (($cp - 97 < 26) ? $cp - 97 :
$this->_base));
570 foreach ($input as $v) {
572 if (in_array($v, $this->NP[
'map_nothing']))
continue;
575 if (in_array($v, $this->NP[
'prohibit']) || in_array($v, $this->NP[
'general_prohibited'])) {
576 $this->
_error(
'NAMEPREP: Prohibited input U+'.sprintf(
'%08X', $v));
579 foreach ($this->NP[
'prohibit_ranges'] as $range) {
580 if ($range[0] <= $v && $v <= $range[1]) {
581 $this->
_error(
'NAMEPREP: Prohibited input U+'.sprintf(
'%08X', $v));
587 if (0xAC00 <= $v && $v <= 0xD7AF) {
589 $output[] = (int) $out;
592 } elseif (isset($this->NP[
'replacemaps'][$v])) {
594 $output[] = (int) $out;
597 $output[] = (int) $v;
607 $out_len = count($output);
608 for ($i = 0; $i < $out_len; ++$i) {
610 if ((!$last_class || $last_class > $class) && $class) {
612 $seq_len = $i - $last_starter;
613 $out = $this->
_combine(array_slice($output, $last_starter, $seq_len));
617 $output[$last_starter] = $out;
618 if (count($out) != $seq_len) {
619 for ($j = $i+1; $j < $out_len; ++$j) {
620 $output[$j-1] = $output[$j];
622 unset($output[$out_len]);
632 if (!$class) $last_starter = $i;
633 $last_class = $class;
647 $sindex = (int) $char - $this->_sbase;
648 if ($sindex < 0 || $sindex >= $this->_scount) {
652 $result[] = (int) $this->_lbase + $sindex / $this->_ncount;
653 $result[] = (int) $this->_vbase + ($sindex % $this->_ncount) /
$this->_tcount;
654 $T = intval($this->_tbase + $sindex % $this->_tcount);
655 if ($T != $this->_tbase) $result[] = $T;
667 $inp_len = count($input);
668 if (!$inp_len)
return array();
670 $last = (int) $input[0];
673 for ($i = 1; $i < $inp_len; ++$i) {
674 $char = (int) $input[$i];
680 if (0 <= $sindex && $sindex < $this->_scount && ($sindex % $this->_tcount == 0)
684 $result[(count($result) - 1)] = $last;
688 if (0 <= $lindex && $lindex < $this->_lcount && 0 <= $vindex && $vindex < $this->_vcount) {
690 $last = (int) $this->_sbase + ($lindex * $this->_vcount + $vindex) *
$this->_tcount;
691 $result[(count($result) - 1)] = $last;
709 return isset($this->NP[
'norm_combcls'][$char]) ? $this->NP[
'norm_combcls'][$char] : 0;
721 $size = count($input);
725 for ($i = 0; $i < $size-1; ++$i) {
727 if ($next != 0 && $last > $next) {
729 for ($j = $i + 1; $j > 0; --$j) {
731 $t = intval($input[$j]);
732 $input[$j] = intval($input[$j-1]);
753 $inp_len = count($input);
754 foreach ($this->NP[
'replacemaps'] as $np_src => $np_target) {
755 if ($np_target[0] != $input[0])
continue;
756 if (count($np_target) != $inp_len)
continue;
758 foreach ($input as $k2 => $v2) {
759 if ($v2 == $np_target[$k2]) {
766 if ($hit)
return $np_src;
792 $inp_len = strlen($input);
795 for ($k = 0; $k < $inp_len; ++$k) {
796 $v = ord($input{$k});
799 $output[$out_len] = $v;
801 if (
'add' == $mode) {
802 $this->
_error(
'Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
807 if (
'next' == $mode) {
813 $v = ($v - 192) << 6;
814 } elseif ($v >> 4 == 14) {
816 $v = ($v - 224) << 12;
817 } elseif ($v >> 3 == 30) {
819 $v = ($v - 240) << 18;
820 } elseif ($v >> 2 == 62) {
822 $v = ($v - 248) << 24;
823 } elseif ($v >> 1 == 126) {
825 $v = ($v - 252) << 30;
827 $this->
_error(
'This might be UTF-8, but I don\'t understand it at byte '.$k);
830 if (
'add' == $mode) {
831 $output[$out_len] = (int) $v;
836 if (
'add' == $mode) {
837 if (!$this->_allow_overlong && $test ==
'range') {
839 if (($v < 0xA0 && $start_byte == 0xE0) || ($v < 0x90 && $start_byte == 0xF0) || ($v > 0x8F && $start_byte == 0xF4)) {
840 $this->
_error(
'Bogus UTF-8 character detected (out of legal range) at byte '.$k);
845 $v = ($v - 128) << ($next_byte * 6);
846 $output[($out_len - 1)] += $v;
849 $this->
_error(
'Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
852 if ($next_byte < 0) {
869 foreach ($input as $v) {
874 } elseif ($v < (1 << 11)) {
875 $output .= chr(192 + ($v >> 6)) . chr(128 + ($v & 63));
876 } elseif ($v < (1 << 16)) {
877 $output .= chr(224 + ($v >> 12)) . chr(128 + (($v >> 6) & 63)) . chr(128 + ($v & 63));
878 } elseif ($v < (1 << 21)) {
879 $output .= chr(240 + ($v >> 18)) . chr(128 + (($v >> 12) & 63))
880 . chr(128 + (($v >> 6) & 63)) . chr(128 + ($v & 63));
881 } elseif ($v < (1 << 26)) {
882 $output .= chr(248 + ($v >> 24)) . chr(128 + (($v >> 18) & 63))
883 . chr(128 + (($v >> 12) & 63)) . chr(128 + (($v >> 6) & 63))
884 . chr(128 + ($v & 63));
885 } elseif ($v < (1 << 31)) {
886 $output .= chr(252 + ($v >> 30)) . chr(128 + (($v >> 24) & 63))
887 . chr(128 + (($v >> 18) & 63)) . chr(128 + (($v >> 12) & 63))
888 . chr(128 + (($v >> 6) & 63)) . chr(128 + ($v & 63));
890 $this->
_error(
'Conversion from UCS-4 to UTF-8 failed: malformed input at byte '.$k);
907 foreach ($input as $v) {
908 $output .= chr(($v >> 24) & 255).chr(($v >> 16) & 255).chr(($v >> 8) & 255).chr($v & 255);
921 $inp_len = strlen($input);
924 $this->
_error(
'Input UCS4 string is broken');
928 if (!$inp_len)
return $output;
929 for ($i = 0, $out_len = -1; $i < $inp_len; ++$i) {
933 $output[$out_len] = 0;
935 $output[$out_len] += ord($input{$i}) << (8 * (3 - ($i % 4) ) );
965 return $this->IC->set_parameters($option, $param);