Joomla Platform  13.1
Documentation des API du framework Joomla Platform
 Tout Classes Espaces de nommage Fichiers Fonctions Variables Pages
patterns.php
Aller à la documentation de ce fichier.
1 <?php
2 /**
3 * PCRE Regular expressions for UTF-8. Note this file is not actually used by
4 * the rest of the library but these regular expressions can be useful to have
5 * available.
6 * @version $Id$
7 * @see http://www.w3.org/International/questions/qa-forms-utf-8
8 * @package utf8
9 * @subpackage patterns
10 */
11 
12 //--------------------------------------------------------------------
13 /**
14 * PCRE Pattern to check a UTF-8 string is valid
15 * Comes from W3 FAQ: Multilingual Forms
16 * Note: modified to include full ASCII range including control chars
17 * @see http://www.w3.org/International/questions/qa-forms-utf-8
18 * @package utf8
19 * @subpackage patterns
20 */
21 $UTF8_VALID = '^('.
22  '[\x00-\x7F]'. # ASCII (including control chars)
23  '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte
24  '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs
25  '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte
26  '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates
27  '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3
28  '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15
29  '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16
30  ')*$';
31 
32 //--------------------------------------------------------------------
33 /**
34 * PCRE Pattern to match single UTF-8 characters
35 * Comes from W3 FAQ: Multilingual Forms
36 * Note: modified to include full ASCII range including control chars
37 * @see http://www.w3.org/International/questions/qa-forms-utf-8
38 * @package utf8
39 * @subpackage patterns
40 */
42  '([\x00-\x7F])'. # ASCII (including control chars)
43  '|([\xC2-\xDF][\x80-\xBF])'. # non-overlong 2-byte
44  '|(\xE0[\xA0-\xBF][\x80-\xBF])'. # excluding overlongs
45  '|([\xE1-\xEC\xEE\xEF][\x80-\xBF]{2})'. # straight 3-byte
46  '|(\xED[\x80-\x9F][\x80-\xBF])'. # excluding surrogates
47  '|(\xF0[\x90-\xBF][\x80-\xBF]{2})'. # planes 1-3
48  '|([\xF1-\xF3][\x80-\xBF]{3})'. # planes 4-15
49  '|(\xF4[\x80-\x8F][\x80-\xBF]{2})'; # plane 16
50 
51 //--------------------------------------------------------------------
52 /**
53 * PCRE Pattern to locate bad bytes in a UTF-8 string
54 * Comes from W3 FAQ: Multilingual Forms
55 * Note: modified to include full ASCII range including control chars
56 * @see http://www.w3.org/International/questions/qa-forms-utf-8
57 * @package utf8
58 * @subpackage patterns
59 */
61  '([\x00-\x7F]'. # ASCII (including control chars)
62  '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte
63  '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs
64  '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte
65  '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates
66  '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3
67  '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15
68  '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16
69  '|(.{1}))'; # invalid byte