<?php
include_once $_SERVER['DOCUMENT_ROOT'] . '/include/shared-manual.inc';
$TOC = array();
$TOC_DEPRECATED = array();
$PARENTS = array();
include_once dirname(__FILE__) ."/toc/reference.pcre.pattern.syntax.inc";
$setup = array (
  'home' => 
  array (
    0 => 'index.php',
    1 => 'PHP Manual',
  ),
  'head' => 
  array (
    0 => 'UTF-8',
    1 => 'it',
  ),
  'this' => 
  array (
    0 => 'regexp.reference.unicode.php',
    1 => 'Unicode character properties',
    2 => 'Unicode character properties',
  ),
  'up' => 
  array (
    0 => 'reference.pcre.pattern.syntax.php',
    1 => 'Sintassi delle regex PCRE',
  ),
  'prev' => 
  array (
    0 => 'regexp.reference.escape.php',
    1 => 'Sequenze di escape',
  ),
  'next' => 
  array (
    0 => 'regexp.reference.anchors.php',
    1 => 'I caratteri &quot;^&quot; e &quot;$&quot;',
  ),
  'alternatives' => 
  array (
  ),
  'source' => 
  array (
    'lang' => 'it',
    'path' => 'reference/pcre/pattern.syntax.xml',
  ),
  'history' => 
  array (
  ),
);
$setup["toc"] = $TOC;
$setup["toc_deprecated"] = $TOC_DEPRECATED;
$setup["parents"] = $PARENTS;
manual_setup($setup);

contributors($setup);

?>
<div id="regexp.reference.unicode" class="section">
  <h2 class="title">Unicode character properties</h2>
  <p class="para">
   Since 5.1.0, three
   additional escape sequences to match generic character types are available
   when <em>UTF-8 mode</em> is selected. They are:
  </p>
  <dl>
   
    <dt><em>\p{xx}</em></dt>
    <dd><span class="simpara">a character with the xx property</span></dd>
   
   
    <dt><em>\P{xx}</em></dt>
    <dd><span class="simpara">a character without the xx property</span></dd>
   
   
    <dt><em>\X</em></dt>
    <dd><span class="simpara">an extended Unicode sequence</span></dd>
   
  </dl>
  <p class="para">
   The property names represented by <code class="literal">xx</code> above are limited 
   to the Unicode general category properties. Each character has exactly one 
   such property, specified by a two-letter abbreviation. For compatibility with
   Perl, negation can be specified by including a circumflex between the
   opening brace and the property name. For example, <code class="literal">\p{^Lu}</code> 
   is the same as <code class="literal">\P{Lu}</code>.
  </p>
  <p class="para">
   If only one letter is specified with <code class="literal">\p</code> or 
   <code class="literal">\P</code>, it includes all the properties that start with that
   letter. In this case, in the absence of negation, the curly brackets in the 
   escape sequence are optional; these two examples have the same effect:
  </p>
  <div class="informalexample">
   <div class="example-contents">
<div class="cdata"><pre>
\p{L}
\pL
</pre></div>
   </div>

  </div>
  <table class="doctable table">
   <caption><strong>Supported property codes</strong></caption>
   
    <thead>
     <tr>
      <th>Property</th>
      <th>Matches</th>
      <th>Notes</th>
     </tr>

    </thead>

    <tbody class="tbody">
     <tr>
      <td><code class="literal">C</code></td>
      <td>Other</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Cc</code></td>
      <td>Control</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Cf</code></td>
      <td>Format</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Cn</code></td>
      <td>Unassigned</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Co</code></td>
      <td>Private use</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Cs</code></td>
      <td>Surrogate</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">L</code></td>
      <td>Letter</td>
      <td>
       Includes the following properties: <code class="literal">Ll</code>, 
       <code class="literal">Lm</code>, <code class="literal">Lo</code>, <code class="literal">Lt</code> and 
       <code class="literal">Lu</code>.
      </td>
     </tr>

     <tr>
      <td><code class="literal">Ll</code></td>
      <td>Lower case letter</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Lm</code></td>
      <td>Modifier letter</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Lo</code></td>
      <td>Other letter</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Lt</code></td>
      <td>Title case letter</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Lu</code></td>
      <td>Upper case letter</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">M</code></td>
      <td>Mark</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Mc</code></td>
      <td>Spacing mark</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Me</code></td>
      <td>Enclosing mark</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Mn</code></td>
      <td>Non-spacing mark</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">N</code></td>
      <td>Number</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Nd</code></td>
      <td>Decimal number</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Nl</code></td>
      <td>Letter number</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">No</code></td>
      <td>Other number</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">P</code></td>
      <td>Punctuation</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Pc</code></td>
      <td>Connector punctuation</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Pd</code></td>
      <td>Dash punctuation</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Pe</code></td>
      <td>Close punctuation</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Pf</code></td>
      <td>Final punctuation</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Pi</code></td>
      <td>Initial punctuation</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Po</code></td>
      <td>Other punctuation</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Ps</code></td>
      <td>Open punctuation</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">S</code></td>
      <td>Symbol</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Sc</code></td>
      <td>Currency symbol</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Sk</code></td>
      <td>Modifier symbol</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Sm</code></td>
      <td>Mathematical symbol</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">So</code></td>
      <td>Other symbol</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Z</code></td>
      <td>Separator</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Zl</code></td>
      <td>Line separator</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Zp</code></td>
      <td>Paragraph separator</td>
      <td class="empty">&nbsp;</td>
     </tr>

     <tr>
      <td><code class="literal">Zs</code></td>
      <td>Space separator</td>
      <td class="empty">&nbsp;</td>
     </tr>

    </tbody>
   
  </table>

  <p class="para">
   Extended properties such as &quot;Greek&quot; or &quot;InMusicalSymbols&quot; are not
   supported by PCRE.
  </p>
  <p class="para">
   Specifying case-insensitive (caseless) matching does not affect these escape sequences.
   For example, <code class="literal">\p{Lu}</code> always matches only upper case letters.
  </p>
  <p class="para">
   The <code class="literal">\X</code> escape matches any number of Unicode characters 
   that form an extended Unicode sequence. <code class="literal">\X</code> is equivalent 
   to <code class="literal">(?&gt;\PM\pM*)</code>.
  </p>
  <p class="para">
   That is, it matches a character without the &quot;mark&quot; property, followed
   by zero or more characters with the &quot;mark&quot; property, and treats the
   sequence as an atomic group (see below). Characters with the &quot;mark&quot;
   property are typically accents that affect the preceding character.
  </p>
  <p class="para">
   Matching characters by Unicode property is not fast, because PCRE has
   to search a structure that contains data for over fifteen thousand
   characters. That is why the traditional escape sequences such as 
   <code class="literal">\d</code> and <code class="literal">\w</code> do not use Unicode properties 
   in PCRE.
  </p>
 </div><?php manual_footer($setup); ?>