<?php
include_once $_SERVER['DOCUMENT_ROOT'] . '/include/shared-manual.inc';
$TOC = array();
$TOC_DEPRECATED = array();
$PARENTS = array();
include_once dirname(__FILE__) ."/toc/book.parle.inc";
$setup = array (
  'home' => 
  array (
    0 => 'index.php',
    1 => 'PHP Manual',
  ),
  'head' => 
  array (
    0 => 'UTF-8',
    1 => 'en',
  ),
  'this' => 
  array (
    0 => 'parle.pattern.matching.php',
    1 => 'Pattern matching',
    2 => 'Parle pattern matching',
  ),
  'up' => 
  array (
    0 => 'book.parle.php',
    1 => 'Parle',
  ),
  'prev' => 
  array (
    0 => 'parle.constants.php',
    1 => 'Predefined Constants',
  ),
  'next' => 
  array (
    0 => 'parle.examples.php',
    1 => 'Examples',
  ),
  'alternatives' => 
  array (
  ),
  'source' => 
  array (
    'lang' => 'en',
    'path' => 'reference/parle/pattern.matching.xml',
  ),
  'history' => 
  array (
  ),
);
$setup["toc"] = $TOC;
$setup["toc_deprecated"] = $TOC_DEPRECATED;
$setup["parents"] = $PARENTS;
manual_setup($setup);

contributors($setup);

?>
<div id="parle.pattern.matching" class="chapter">
 <h1 class="title">Parle pattern matching</h1>

 
 <p class="para">
  Parle supports regex matching similar to flex.
  Also supported are the following POSIX character sets:
  <span class="simplelist"><code class="literal">[:alnum:]</code>, <code class="literal">[:alpha:]</code>, <code class="literal">[:blank:]</code>, <code class="literal">[:cntrl:]</code>, <code class="literal">[:digit:]</code>, <code class="literal">[:graph:]</code>, <code class="literal">[:lower:]</code>, <code class="literal">[:print:]</code>, <code class="literal">[:punct:]</code>, <code class="literal">[:space:]</code>, <code class="literal">[:upper:]</code>, <code class="literal">[:xdigit:]</code></span>.
 </p>
 <p class="para">
  The Unicode character classes are currently not enabled by default, pass --enable-parle-utf32 to make them available.
  A particular encoding can be mapped with a correctly constructed regex.
  For example, to match the EURO symbol encoded in UTF-8, the regular expression <code class="literal">[\xe2][\x82][\xac]</code> can be used.
  The pattern for an UTF-8 encoded string could be <code class="literal">[ -\x7f]{+}[\x80-\xbf]{+}[\xc2-\xdf]{+}[\xe0-\xef]{+}[\xf0-\xff]+</code>.
 </p>

 <div id="parle.regex.chars" class="section">
  <h2 class="title">Character representations</h2>
  <p class="para">
   <table class="doctable table">
    <caption><strong>Character representations</strong></caption>
    
     <thead>
      <tr>
       <th>Sequence</th><th>Description</th>
      </tr>

     </thead>

     <tbody class="tbody">
      <tr>
       <td>\a</td><td>Alert (bell).</td>
      </tr>

      <tr>
       <td>\b</td><td>Backspace.</td>
      </tr>

      <tr>
       <td>\e</td><td>ESC character, \x1b.</td>
      </tr>

      <tr>
       <td>\n</td><td>Newline.</td>
      </tr>

      <tr>
       <td>\r</td><td>Carriage return.</td>
      </tr>

      <tr>
       <td>\f</td><td>Form feed, \x0c.</td>
      </tr>

      <tr>
       <td>\t</td><td>Horizontal tab, \x09.</td>
      </tr>

      <tr>
       <td>\v</td><td>Vertical tab, \x0b.</td>
      </tr>

      <tr>
       <td>\oct</td><td>Character specified by a three-digit octal code.</td>
      </tr>

      <tr>
       <td>\xhex</td><td>Character specified by a hex code.</td>
      </tr>

      <tr>
       <td>\cchar</td><td>Named control character.</td>
      </tr>

     </tbody>
    
   </table>

  </p>
 </div>
 <div id="parle.regex.charclass" class="section">
  <h2 class="title">Character classes</h2>
  <p class="para">
   <table class="doctable table">
    <caption><strong>Character classes</strong></caption>
    
     <thead>
      <tr>
       <th>Sequence</th><th>Description</th>
      </tr>

     </thead>

     <tbody class="tbody">
      <tr>
       <td>[...]</td><td>A single character listed or contained within a listed range. Ranges can be combined with the <code class="literal">{+}</code> and <code class="literal">{-}</code> operators. For example <code class="literal">[a-z]{+}[0-9]</code> is the same as <code class="literal">[0-9a-z]</code> and <code class="literal">[a-z]{-}[aeiou]</code> is the same as <code class="literal">[b-df-hj-np-tv-z]</code>.</td>
      </tr>

      <tr>
       <td>[^...]</td><td>A single character not listed and not contained within a listed range.</td>
      </tr>

      <tr>
       <td>.</td><td>Any character, default <code class="literal">[^\n].</code></td>
      </tr>

      <tr>
       <td>\d</td><td>Digit character, <code class="literal">[0-9]</code>.</td>
      </tr>

      <tr>
       <td>\D</td><td>Non-digit character, <code class="literal">[^0-9]</code>.</td>
      </tr>

      <tr>
       <td>\s</td><td>White space character, <code class="literal">[ \t\n\r\f\v]</code>.</td>
      </tr>

      <tr>
       <td>\S</td><td>Non-white space character, <code class="literal">[^ \t\n\r\f\v]</code>.</td>
      </tr>

      <tr>
       <td>\w</td><td>Word character, <code class="literal">[a-zA-Z0-9_]</code>.</td>
      </tr>

      <tr>
       <td>\W</td><td>Non-word character, <code class="literal">[^a-zA-Z0-9_]</code>.</td>
      </tr>

     </tbody>
    
   </table>

  </p>
 </div>
 <div id="parle.regex.unicodecharclass" class="section">
  <h2 class="title">Unicode character classes</h2>
  <p class="para">
   <table class="doctable table">
    <caption><strong>Unicode character classes</strong></caption>
    
     <thead>
      <tr>
       <th>Sequence</th><th>Description</th>
      </tr>

     </thead>

     <tbody class="tbody">
      <tr>
       <td>\p{C}</td><td>Other.</td>
      </tr>

      <tr>
       <td>\p{Cc}</td><td>Other, control.</td>
      </tr>

      <tr>
       <td>\p{Cf}</td><td>Other, format.</td>
      </tr>

      <tr>
       <td>\p{Co}</td><td>Other, private use.</td>
      </tr>

      <tr>
       <td>\p{Cs}</td><td>Other, surrogate.</td>
      </tr>

      <tr>
       <td>\p{L}</td><td>Letter.</td>
      </tr>

      <tr>
       <td>\p{LC}</td><td>Letter, cased.</td>
      </tr>

      <tr>
       <td>\p{Ll}</td><td>Letter, lowercase.</td>
      </tr>

      <tr>
       <td>\p{Lm}</td><td>Letter, modifier.</td>
      </tr>

      <tr>
       <td>\p{Lo}</td><td>Letter, other.</td>
      </tr>

      <tr>
       <td>\p{Lt}</td><td>Letter, titlecase.</td>
      </tr>

      <tr>
       <td>\p{Lu}</td><td>Letter, uppercase.</td>
      </tr>

      <tr>
       <td>\p{M}</td><td>Mark.</td>
      </tr>

      <tr>
       <td>\p{Mc}</td><td>Mark, space combining.</td>
      </tr>

      <tr>
       <td>\p{Me}</td><td>Mark, enclosing.</td>
      </tr>

      <tr>
       <td>\p{Mn}</td><td>Mark, nonspacing.</td>
      </tr>

      <tr>
       <td>\p{N}</td><td>Number.</td>
      </tr>

      <tr>
       <td>\p{Nd}</td><td>Number, decimal digit.</td>
      </tr>

      <tr>
       <td>\p{Nl}</td><td>Number, letter.</td>
      </tr>

      <tr>
       <td>\p{No}</td><td>Number, other.</td>
      </tr>

      <tr>
       <td>\p{P}</td><td>Punctuation.</td>
      </tr>

      <tr>
       <td>\p{Pc}</td><td>Punctiation, connector.</td>
      </tr>

      <tr>
       <td>\p{Pd}</td><td>Punctuation, dash.</td>
      </tr>

      <tr>
       <td>\p{Pe}</td><td>Punctuation, close.</td>
      </tr>

      <tr>
       <td>\p{Pf}</td><td>Punctuation, final quote.</td>
      </tr>

      <tr>
       <td>\p{Pi}</td><td>Punctuation, initial quote.</td>
      </tr>

      <tr>
       <td>\p{Po}</td><td>Punctuation, other.</td>
      </tr>

      <tr>
       <td>\p{Ps}</td><td>Punctuation, open.</td>
      </tr>

      <tr>
       <td>\p{S}</td><td>Symbol.</td>
      </tr>

      <tr>
       <td>\p{Sc}</td><td>Symbol, currency.</td>
      </tr>

      <tr>
       <td>\p{Sk}</td><td>Symbol, modifier.</td>
      </tr>

      <tr>
       <td>\p{Sm}</td><td>Symbol, math.</td>
      </tr>

      <tr>
       <td>\p{So}</td><td>Symbol, other.</td>
      </tr>

      <tr>
       <td>\p{Z}</td><td>Separator.</td>
      </tr>

      <tr>
       <td>\p{Zl}</td><td>Separator, line.</td>
      </tr>

      <tr>
       <td>\p{Zp}</td><td>Separator, paragraph.</td>
      </tr>

      <tr>
       <td>\p{Zs}</td><td>Separator, space.</td>
      </tr>

     </tbody>
    
   </table>

  </p>
  <p class="para">
   These character classes are only available, if the option --enable-parle-utf32 was passed at the compilation time.
  </p>
 </div>
 <div id="parle.regex.alternation" class="section">
  <h2 class="title">Alternation and repetition</h2>
  <p class="para">
   <table class="doctable table">
    <caption><strong>Alternation and repetition</strong></caption>
    
     <thead>
      <tr>
       <th>Sequence</th><th>Greedy</th><th>Description</th>
      </tr>

     </thead>

     <tbody class="tbody">
      <tr>
       <td>...|...</td><td>-</td><td>Try sub-patterns in alternation.</td>
      </tr>

      <tr>
       <td>*</td><td>yes</td><td>Match 0 or more times.</td>
      </tr>

      <tr>
       <td>+</td><td>yes</td><td>Match 1 or more times.</td>
      </tr>

      <tr>
       <td>?</td><td>yes</td><td>Match 0 or 1 times.</td>
      </tr>

      <tr>
       <td>{n}</td><td>no</td><td>Match exactly n times.</td>
      </tr>

      <tr>
       <td>{n,}</td><td>yes</td><td>Match at least n times.</td>
      </tr>

      <tr>
       <td>{n,m}</td><td>yes</td><td>Match at least n times but no more than m times.</td>
      </tr>

      <tr>
       <td>*?</td><td>no</td><td>Match 0 or more times.</td>
      </tr>

      <tr>
       <td>+?</td><td>no</td><td>Match 1 or more times.</td>
      </tr>

      <tr>
       <td>??</td><td>no</td><td>Match 0 or 1 times.</td>
      </tr>

      <tr>
       <td>{n,}?</td><td>no</td><td>Match at least n times.</td>
      </tr>

      <tr>
       <td>{n,m}?</td><td>no</td><td>Match at least n times but no more than m times.</td>
      </tr>

      <tr>
       <td>{MACRO}</td><td>-</td><td>Include the regex MACRO in the current regex.</td>
      </tr>

     </tbody>
    
   </table>

  </p>
 </div>
 <div id="parle.regex.anchors" class="section">
  <h2 class="title">Anchors</h2>
  <p class="para">
   <table class="doctable table">
    <caption><strong>Anchors</strong></caption>
    
     <thead>
      <tr>
       <th>Sequence</th><th>Description</th>
      </tr>

     </thead>

     <tbody class="tbody">
      <tr>
       <td>^</td><td>Start of string or after a newline.</td>
      </tr>

      <tr>
       <td>$</td><td>End of string or before a newline.</td>
      </tr>

     </tbody>
    
   </table>

  </p>
 </div>
 <div id="parle.regex.grouping" class="section">
  <h2 class="title">Grouping</h2>
  <p class="para">
   <table class="doctable table">
    <caption><strong>Grouping</strong></caption>
    
     <thead>
      <tr>
       <th>Sequence</th>
       <th>Description</th>
      </tr>

     </thead>

     <tbody class="tbody">
      <tr>
       <td>(...)</td>
       <td>Group a regular expression to override default operator precedence.</td>
      </tr>

      <tr>
       <td style="vertical-align: top;">(?r-s:pattern)</td>
       <td>
        <span class="simpara">
         Apply option r and omit option s while interpreting pattern.
         Options may be zero or more of the characters i, s, or x.
        </span>
        <span class="simpara">
         <code class="literal">i</code> means case-insensitive.
        </span>
        <span class="simpara">
         <code class="literal">-i</code> means case-sensitive.
        </span>
        <span class="simpara">
         <code class="literal">s</code> alters the meaning of <code class="literal">.</code> to match any character whatsoever.
        </span>
        <span class="simpara">
         <code class="literal">-s</code> alters the meaning of <code class="literal">.</code> to match any character except <code class="literal">\n</code>.
        </span>
        <span class="simpara">
         <code class="literal">x</code> ignores comments and whitespace in patterns.
         Whitespace is ignored unless it is backslash-escaped, contained within <code class="literal">&quot;&quot;s</code>,
         or appears inside a character range.
        </span>
        <span class="simpara">
         These options can be applied globally at the rules level by passing a combination of the bit flags to the lexer.
        </span>
       </td>
      </tr>

      <tr>
       <td>(?# comment )</td>
       <td>Omit everything within (). The first ) character encountered ends the pattern. It is not possible for the comment to contain a ) character. The comment may span lines.</td>
      </tr>

     </tbody>
    
   </table>

  </p>
 </div>
</div>
<?php manual_footer($setup); ?>