File "HTML5.php"
Full path: /home/argothem/www/organecyberpresse/plugins-dist/safehtml/lib/xemlock/htmlpurifier-html5/library/HTMLPurifier/Lexer/HTML5.php
File size: 2.3 KB
MIME-type: text/x-php
Charset: utf-8
<?php
/**
* Experimental HTML5-compliant parser using masterminds/html5 library.
*/
class HTMLPurifier_Lexer_HTML5 extends HTMLPurifier_Lexer_DOMLex
{
/**
* @throws HTMLPurifier_Exception
* @codeCoverageIgnore
*/
public function __construct()
{
if (!class_exists('\Masterminds\HTML5')) {
throw new HTMLPurifier_Exception('Cannot instantiate HTML5 lexer. \Masterminds\HTML5 class is not available');
}
parent::__construct();
}
/**
* Transforms an HTML string into tokens.
*
* @param string $html
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return HTMLPurifier_Token[]
*/
public function tokenizeHTML($html, $config, $context)
{
$html = $this->normalize($html, $config, $context);
$html = $this->armor($html, $config);
// masterminds/html5 requires <html>, <head> and <body> tags
$html = $this->wrapHTML($html, $config, $context, false);
// Parse the document. $dom is a DOMDocument.
$html5 = new \Masterminds\HTML5(array('disable_html_ns' => true));
$doc = $html5->loadHTML($html);
$body = $doc->getElementsByTagName('html')->item(0) // <html>
->getElementsByTagName('body')->item(0); // <body>
$tokens = array();
$this->tokenizeDOM($body, $tokens, $config);
return $tokens;
}
/**
* Attempt to armor stray angled brackets that cannot possibly
* form tags and thus are probably being used as emoticons
*
* @param string $html
* @param HTMLPurifier_Config $config
* @return string
*/
protected function armor($html, HTMLPurifier_Config $config)
{
if ($config->get('Core.AggressivelyFixLt')) {
$char = '[^a-z!\/]';
$comment = "/<!--(.*?)(-->|\z)/is";
$html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
do {
$old = $html;
$html = preg_replace("/<($char)/i", '<\\1', $html);
} while ($html !== $old);
$html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments
}
return $html;
}
}