Joomla Platform  13.1
Documentation des API du framework Joomla Platform
 Tout Classes Espaces de nommage Fichiers Fonctions Variables Pages
Référence du fichier bad.php

Aller au code source de ce fichier.

Espaces de nommage

namespace  utf8

Fonctions

 utf8_bad_find ($str)
 utf8_bad_findall ($str)
 utf8_bad_strip ($str)
 utf8_bad_replace ($str, $replace= '?')
 utf8_bad_identify ($str, &$i)
 utf8_bad_explain ($code)

Variables

const UTF8_BAD_5OCTET = 1
const UTF8_BAD_6OCTET = 2
const UTF8_BAD_SEQID = 3
const UTF8_BAD_NONSHORT = 4
const UTF8_BAD_SURROGATE = 5
const UTF8_BAD_UNIOUTRANGE = 6
const UTF8_BAD_SEQINCOMPLETE = 7

Documentation des fonctions

utf8_bad_explain (   $code)

Définition à la ligne 384 du fichier bad.php.

Références UTF8_BAD_5OCTET, UTF8_BAD_6OCTET, UTF8_BAD_NONSHORT, UTF8_BAD_SEQID, UTF8_BAD_SEQINCOMPLETE, UTF8_BAD_SURROGATE, et UTF8_BAD_UNIOUTRANGE.

{
switch ($code) {
return 'Five octet sequences are valid UTF-8 but are not supported by Unicode';
break;
return 'Six octet sequences are valid UTF-8 but are not supported by Unicode';
break;
return 'Invalid octet for use as start of multi-byte UTF-8 sequence';
break;
return 'From Unicode 3.1, non-shortest form is illegal';
break;
return 'From Unicode 3.2, surrogate characters are illegal';
break;
return 'Codepoints outside the Unicode range are illegal';
break;
return 'Incomplete multi-octet sequence';
break;
}
trigger_error('Unknown error code: '.$code,E_USER_WARNING);
return FALSE;
}
utf8_bad_find (   $str)

Définition à la ligne 33 du fichier bad.php.

Références $UTF8_BAD.

{
'([\x00-\x7F]'. # ASCII (including control chars)
'|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte
'|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs
'|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte
'|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates
'|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3
'|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15
'|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16
'|(.{1}))'; # invalid byte
$pos = 0;
$badList = array();
while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) {
$bytes = strlen($matches[0]);
if ( isset($matches[2])) {
return $pos;
}
$pos += $bytes;
$str = substr($str,$bytes);
}
return FALSE;
}
utf8_bad_findall (   $str)

Définition à la ligne 70 du fichier bad.php.

Références $UTF8_BAD.

{
'([\x00-\x7F]'. # ASCII (including control chars)
'|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte
'|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs
'|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte
'|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates
'|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3
'|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15
'|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16
'|(.{1}))'; # invalid byte
$pos = 0;
$badList = array();
while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) {
$bytes = strlen($matches[0]);
if ( isset($matches[2])) {
$badList[] = $pos;
}
$pos += $bytes;
$str = substr($str,$bytes);
}
if ( count($badList) > 0 ) {
return $badList;
}
return FALSE;
}
utf8_bad_identify (   $str,
$i 
)

End of the multi-octet sequence. mUcs4 now contains the final Unicode codepoint to be output

Définition à la ligne 250 du fichier bad.php.

Références UTF8_BAD_5OCTET, UTF8_BAD_6OCTET, UTF8_BAD_NONSHORT, UTF8_BAD_SEQID, UTF8_BAD_SEQINCOMPLETE, UTF8_BAD_SURROGATE, et UTF8_BAD_UNIOUTRANGE.

{
$mState = 0; // cached expected number of octets after the current octet
// until the beginning of the next UTF8 character sequence
$mUcs4 = 0; // cached Unicode character
$mBytes = 1; // cached expected number of octets in the current sequence
$len = strlen($str);
for($i = 0; $i < $len; $i++) {
$in = ord($str{$i});
if ( $mState == 0) {
// When mState is zero we expect either a US-ASCII character or a
// multi-octet sequence.
if (0 == (0x80 & ($in))) {
// US-ASCII, pass straight through.
$mBytes = 1;
} else if (0xC0 == (0xE0 & ($in))) {
// First octet of 2 octet sequence
$mUcs4 = ($in);
$mUcs4 = ($mUcs4 & 0x1F) << 6;
$mState = 1;
$mBytes = 2;
} else if (0xE0 == (0xF0 & ($in))) {
// First octet of 3 octet sequence
$mUcs4 = ($in);
$mUcs4 = ($mUcs4 & 0x0F) << 12;
$mState = 2;
$mBytes = 3;
} else if (0xF0 == (0xF8 & ($in))) {
// First octet of 4 octet sequence
$mUcs4 = ($in);
$mUcs4 = ($mUcs4 & 0x07) << 18;
$mState = 3;
$mBytes = 4;
} else if (0xF8 == (0xFC & ($in))) {
/* First octet of 5 octet sequence.
*
* This is illegal because the encoded codepoint must be either
* (a) not the shortest form or
* (b) outside the Unicode range of 0-0x10FFFF.
*/
} else if (0xFC == (0xFE & ($in))) {
// First octet of 6 octet sequence, see comments for 5 octet sequence.
} else {
// Current octet is neither in the US-ASCII range nor a legal first
// octet of a multi-octet sequence.
}
} else {
// When mState is non-zero, we expect a continuation of the multi-octet
// sequence
if (0x80 == (0xC0 & ($in))) {
// Legal continuation.
$shift = ($mState - 1) * 6;
$tmp = $in;
$tmp = ($tmp & 0x0000003F) << $shift;
$mUcs4 |= $tmp;
/**
* End of the multi-octet sequence. mUcs4 now contains the final
* Unicode codepoint to be output
*/
if (0 == --$mState) {
// From Unicode 3.1, non-shortest form is illegal
if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
((4 == $mBytes) && ($mUcs4 < 0x10000)) ) {
// From Unicode 3.2, surrogate characters are illegal
} else if (($mUcs4 & 0xFFFFF800) == 0xD800) {
// Codepoints outside the Unicode range are illegal
} else if ($mUcs4 > 0x10FFFF) {
}
//initialize UTF8 cache
$mState = 0;
$mUcs4 = 0;
$mBytes = 1;
}
} else {
// ((0xC0 & (*in) != 0x80) && (mState != 0))
// Incomplete multi-octet sequence.
$i--;
}
}
}
if ( $mState != 0 ) {
// Incomplete multi-octet sequence.
$i--;
}
// No bad octets found
$i = NULL;
return FALSE;
}
utf8_bad_replace (   $str,
  $replace = '?' 
)

Définition à la ligne 146 du fichier bad.php.

Références $UTF8_BAD.

{
'([\x00-\x7F]'. # ASCII (including control chars)
'|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte
'|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs
'|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte
'|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates
'|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3
'|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15
'|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16
'|(.{1}))'; # invalid byte
ob_start();
while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) {
if ( !isset($matches[2])) {
echo $matches[0];
} else {
echo $replace;
}
$str = substr($str,strlen($matches[0]));
}
$result = ob_get_contents();
ob_end_clean();
return $result;
}
utf8_bad_strip (   $str)

Définition à la ligne 109 du fichier bad.php.

Références $UTF8_BAD.

{
'([\x00-\x7F]'. # ASCII (including control chars)
'|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte
'|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs
'|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte
'|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates
'|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3
'|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15
'|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16
'|(.{1}))'; # invalid byte
ob_start();
while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) {
if ( !isset($matches[2])) {
echo $matches[0];
}
$str = substr($str,strlen($matches[0]));
}
$result = ob_get_contents();
ob_end_clean();
return $result;
}

Documentation des variables

const UTF8_BAD_5OCTET = 1

Définition à la ligne 180 du fichier bad.php.

Référencé par utf8_bad_explain(), et utf8_bad_identify().

const UTF8_BAD_6OCTET = 2

Définition à la ligne 190 du fichier bad.php.

Référencé par utf8_bad_explain(), et utf8_bad_identify().

const UTF8_BAD_NONSHORT = 4

Définition à la ligne 208 du fichier bad.php.

Référencé par utf8_bad_explain(), et utf8_bad_identify().

const UTF8_BAD_SEQID = 3

Définition à la ligne 199 du fichier bad.php.

Référencé par utf8_bad_explain(), et utf8_bad_identify().

const UTF8_BAD_SEQINCOMPLETE = 7

Définition à la ligne 236 du fichier bad.php.

Référencé par utf8_bad_explain(), et utf8_bad_identify().

const UTF8_BAD_SURROGATE = 5

Définition à la ligne 217 du fichier bad.php.

Référencé par utf8_bad_explain(), et utf8_bad_identify().

const UTF8_BAD_UNIOUTRANGE = 6

Définition à la ligne 226 du fichier bad.php.

Référencé par utf8_bad_explain(), et utf8_bad_identify().