File "DOM.php"
Full path: /home/argothem/www/organecyberpresse/plugins/auto/querypath/v3.0.0/lib/querypath/vendor/gravitypdf/querypath/src/DOM.php
File size: 15.41 KB
MIME-type: text/x-php
Charset: utf-8
<?php
namespace QueryPath;
use Countable;
use DOMDocument;
use DOMNode;
use IteratorAggregate;
use Masterminds\HTML5;
use QueryPath\CSS\DOMTraverser;
use QueryPath\Entities;
use SimpleXMLElement;
use SplObjectStorage;
use Traversable;
/**
* Class DOM
*
* @package QueryPath
*
* @property Traversable|array|SplObjectStorage matches
*/
abstract class DOM implements Query, IteratorAggregate, Countable
{
/**
* The array of matches.
*/
protected $matches = [];
/**
* Default parser flags.
*
* These are flags that will be used if no global or local flags override them.
*
* @since 2.0
*/
public const DEFAULT_PARSER_FLAGS = null;
public const JS_CSS_ESCAPE_CDATA = '\\1';
public const JS_CSS_ESCAPE_CDATA_CCOMMENT = '/* \\1 */';
public const JS_CSS_ESCAPE_CDATA_DOUBLESLASH = '// \\1';
public const JS_CSS_ESCAPE_NONE = '';
protected $errTypes = 771; //E_ERROR; | E_USER_ERROR;
protected $document;
/**
* The base DOMDocument.
*/
protected $options = [
'parser_flags' => null,
'omit_xml_declaration' => false,
'replace_entities' => false,
'exception_level' => 771, // E_ERROR | E_USER_ERROR | E_USER_WARNING | E_WARNING
'ignore_parser_warnings' => false,
'escape_xhtml_js_css_sections' => self::JS_CSS_ESCAPE_CDATA_CCOMMENT,
];
/**
* Constructor.
*
* Typically, a new DOMQuery is created by QueryPath::with(), QueryPath::withHTML(),
* qp(), or htmlqp().
*
* @param mixed $document
* A document-like object.
* @param string|null $selector
* A CSS 3 Selector
* @param array $options
* An associative array of options.
*
* @throws Exception
* @see qp()
*/
public function __construct($document = null, $selector = null, $options = [])
{
$selector = is_string($selector) ? trim($selector) : '';
$this->options = $options + Options::get() + $this->options;
$parser_flags = $options['parser_flags'] ?? self::DEFAULT_PARSER_FLAGS;
if (! empty($this->options['ignore_parser_warnings'])) {
// Don't convert parser warnings into exceptions.
$this->errTypes = 257; //E_ERROR | E_USER_ERROR;
} elseif (isset($this->options['exception_level'])) {
// Set the error level at which exceptions will be thrown. By default,
// QueryPath will throw exceptions for
// E_ERROR | E_USER_ERROR | E_WARNING | E_USER_WARNING.
$this->errTypes = $this->options['exception_level'];
}
// Empty: Just create an empty QP.
if (empty($document)) {
$this->document = isset($this->options['encoding']) ? new DOMDocument(
'1.0',
$this->options['encoding']
) : new DOMDocument();
$this->setMatches(new SplObjectStorage());
} // Figure out if document is DOM, HTML/XML, or a filename
elseif (is_object($document)) {
// This is the most frequent object type.
if ($document instanceof SplObjectStorage) {
$this->matches = $document;
if ($document->count() !== 0) {
$first = $this->getFirstMatch();
if (! empty($first->ownerDocument)) {
$this->document = $first->ownerDocument;
}
}
} elseif ($document instanceof self) {
//$this->matches = $document->get(NULL, TRUE);
$this->setMatches($document->get(null, true));
if ($this->matches->count() > 0) {
$this->document = $this->getFirstMatch()->ownerDocument;
}
} elseif ($document instanceof DOMDocument) {
$this->document = $document;
//$this->matches = $this->matches($document->documentElement);
$this->setMatches($document->documentElement);
} elseif ($document instanceof DOMNode) {
$this->document = $document->ownerDocument;
//$this->matches = array($document);
$this->setMatches($document);
} elseif ($document instanceof HTML5) {
$this->document = $document;
$this->setMatches($document->documentElement);
} elseif ($document instanceof SimpleXMLElement) {
$import = dom_import_simplexml($document);
$this->document = $import->ownerDocument;
//$this->matches = array($import);
$this->setMatches($import);
} else {
throw new Exception('Unsupported class type: ' . get_class($document));
}
} elseif (is_array($document)) {
//trigger_error('Detected deprecated array support', E_USER_NOTICE);
if (! empty($document) && $document[0] instanceof DOMNode) {
$found = new SplObjectStorage();
foreach ($document as $item) {
$found->attach($item);
}
//$this->matches = $found;
$this->setMatches($found);
$this->document = $this->getFirstMatch()->ownerDocument;
}
} elseif ($this->isXMLish($document)) {
// $document is a string with XML
$this->document = $this->parseXMLString($document);
$this->setMatches($this->document->documentElement);
} else {
// $document is a filename
$context = empty($options['context']) ? null : $options['context'];
$this->document = $this->parseXMLFile($document, $parser_flags, $context);
$this->setMatches($this->document->documentElement);
}
// Globally set the output option.
$this->document->formatOutput = true;
if (isset($this->options['format_output']) && $this->options['format_output'] === false) {
$this->document->formatOutput = false;
}
// Do a find if the second param was set.
if (strlen($selector) > 0) {
// We don't issue a find because that creates a new DOMQuery.
//$this->find($string);
$query = new DOMTraverser($this->matches);
$query->find($selector);
$this->setMatches($query->matches());
}
}
private function parseXMLString($string, $flags = 0)
{
$document = new DOMDocument('1.0');
$lead = strtolower(substr($string, 0, 5)); // <?xml
try {
set_error_handler([ParseException::class, 'initializeFromError'], $this->errTypes);
if (isset($this->options['convert_to_encoding'])) {
// Is there another way to do this?
$from_enc = $this->options['convert_from_encoding'] ?? 'auto';
$to_enc = $this->options['convert_to_encoding'];
if (function_exists('mb_convert_encoding')) {
$string = mb_convert_encoding($string, $to_enc, $from_enc);
}
}
// This is to avoid cases where low ascii digits have slipped into HTML.
// AFAIK, it should not adversly effect UTF-8 documents.
if (! empty($this->options['strip_low_ascii'])) {
$string = filter_var($string, FILTER_UNSAFE_RAW, FILTER_FLAG_ENCODE_LOW);
}
// Allow users to override parser settings.
$useParser = '';
if (! empty($this->options['use_parser'])) {
$useParser = strtolower($this->options['use_parser']);
}
// If HTML parser is requested, we use it.
if ($useParser === 'html') {
$document->loadHTML($string);
} // Parse as XML if it looks like XML, or if XML parser is requested.
elseif ($lead === '<?xml' || $useParser === 'xml') {
if ($this->options['replace_entities']) {
$string = Entities::replaceAllEntities($string);
}
$document->loadXML($string, $flags);
} // In all other cases, we try the HTML parser.
else {
$document->loadHTML($string);
}
} // Emulate 'finally' behavior.
catch (Exception $e) {
restore_error_handler();
throw $e;
}
restore_error_handler();
if (empty($document)) {
throw new ParseException('Unknown parser exception.');
}
return $document;
}
/**
* EXPERT: Be very, very careful using this.
* A utility function for setting the current set of matches.
* It makes sure the last matches buffer is set (for end() and andSelf()).
*
* @param $matches
*
* @since 2.0
*/
public function setMatches($matches)
{
// This causes a lot of overhead....
//if ($unique) $matches = self::unique($matches);
$this->last = $this->matches;
// Just set current matches.
if ($matches instanceof SplObjectStorage) {
$this->matches = $matches;
} // This is likely legacy code that needs conversion.
elseif (is_array($matches)) {
trigger_error('Legacy array detected.');
$tmp = new SplObjectStorage();
foreach ($matches as $m) {
$tmp->attach($m);
}
$this->matches = $tmp;
}
// For non-arrays, try to create a new match set and
// add this object.
else {
$found = new SplObjectStorage();
if (isset($matches)) {
$found->attach($matches);
}
$this->matches = $found;
}
// EXPERIMENTAL: Support for qp()->length.
$this->length = $this->matches->count();
}
/**
* A depth-checking function. Typically, it only needs to be
* invoked with the first parameter. The rest are used for recursion.
*
* @param DOMNode $ele
* The element.
* @param int $depth
* The depth guage
* @param mixed $current
* The current set.
* @param DOMNode $deepest
* A reference to the current deepest node.
*
* @return array
* Returns an array of DOM nodes.
* @see deepest();
*/
protected function deepestNode(DOMNode $ele, $depth = 0, $current = null, &$deepest = null)
{
// FIXME: Should this use SplObjectStorage?
if (! isset($current)) {
$current = [$ele];
}
if (! isset($deepest)) {
$deepest = $depth;
}
if ($ele->hasChildNodes()) {
foreach ($ele->childNodes as $child) {
if ($child->nodeType === XML_ELEMENT_NODE) {
$current = $this->deepestNode($child, $depth + 1, $current, $deepest);
}
}
} elseif ($depth > $deepest) {
$current = [$ele];
$deepest = $depth;
} elseif ($depth === $deepest) {
$current[] = $ele;
}
return $current;
}
/**
* Prepare an item for insertion into a DOM.
*
* This handles a variety of boilerplate tasks that need doing before an
* indeterminate object can be inserted into a DOM tree.
* - If item is a string, this is converted into a document fragment and returned.
* - If item is a DOMQuery, then all items are retrieved and converted into
* a document fragment and returned.
* - If the item is a DOMNode, it is imported into the current DOM if necessary.
* - If the item is a SimpleXMLElement, it is converted into a DOM node and then
* imported.
*
* @param mixed $item
* Item to prepare for insert.
*
* @return mixed
* Returns the prepared item.
* @throws QueryPath::Exception
* Thrown if the object passed in is not of a supprted object type.
* @throws Exception
*/
protected function prepareInsert($item)
{
if (empty($item)) {
return null;
}
if (is_string($item)) {
// If configured to do so, replace all entities.
if ($this->options['replace_entities']) {
$item = Entities::replaceAllEntities($item);
}
$frag = $this->document->createDocumentFragment();
try {
set_error_handler([ParseException::class, 'initializeFromError'], $this->errTypes);
$frag->appendXML($item);
} // Simulate a finally block.
catch (Exception $e) {
restore_error_handler();
throw $e;
}
restore_error_handler();
return $frag;
}
if ($item instanceof self) {
if ($item->count() === 0) {
return null;
}
$frag = $this->document->createDocumentFragment();
foreach ($item->matches as $m) {
$frag->appendXML($item->document->saveXML($m));
}
return $frag;
}
if ($item instanceof DOMNode) {
if ($item->ownerDocument !== $this->document) {
// Deep clone this and attach it to this document
$item = $this->document->importNode($item, true);
}
return $item;
}
if ($item instanceof SimpleXMLElement) {
$element = dom_import_simplexml($item);
return $this->document->importNode($element, true);
}
// What should we do here?
//var_dump($item);
throw new Exception('Cannot prepare item of unsupported type: ' . gettype($item));
}
/**
* Convenience function for getNthMatch(0).
*/
protected function getFirstMatch()
{
$this->matches->rewind();
return $this->matches->current();
}
/**
* Parse an XML or HTML file.
*
* This attempts to autodetect the type of file, and then parse it.
*
* @param string $filename
* The file name to parse.
* @param int $flags
* The OR-combined flags accepted by the DOM parser. See the PHP documentation
* for DOM or for libxml.
* @param resource $context
* The stream context for the file IO. If this is set, then an alternate
* parsing path is followed: The file is loaded by PHP's stream-aware IO
* facilities, read entirely into memory, and then handed off to
* {@link parseXMLString()}. On large files, this can have a performance impact.
*
* @throws ParseException
* Thrown when a file cannot be loaded or parsed.
*/
private function parseXMLFile($filename, $flags = 0, $context = null)
{
// Backwards compatibility fix for PHP8+
if (is_null($flags)) {
$flags = 0;
}
// If a context is specified, we basically have to do the reading in
// two steps:
if (! empty($context)) {
try {
set_error_handler(['\QueryPath\ParseException', 'initializeFromError'], $this->errTypes);
$contents = file_get_contents($filename, false, $context);
}
// Apparently there is no 'finally' in PHP, so we have to restore the error
// handler this way:
catch (Exception $e) {
restore_error_handler();
throw $e;
}
restore_error_handler();
if ($contents == false) {
throw new ParseException(sprintf(
'Contents of the file %s could not be retrieved.',
$filename
));
}
return $this->parseXMLString($contents, $flags);
}
$document = new DOMDocument();
$lastDot = strrpos($filename, '.');
$htmlExtensions = [
'.html' => 1,
'.htm' => 1,
];
// Allow users to override parser settings.
if (empty($this->options['use_parser'])) {
$useParser = '';
} else {
$useParser = strtolower($this->options['use_parser']);
}
$ext = $lastDot !== false ? strtolower(substr($filename, $lastDot)) : '';
try {
set_error_handler([ParseException::class, 'initializeFromError'], $this->errTypes);
// If the parser is explicitly set to XML, use that parser.
if ($useParser === 'xml') {
$document->load($filename, $flags);
} // Otherwise, see if it looks like HTML.
elseif ($useParser === 'html' || isset($htmlExtensions[$ext])) {
// Try parsing it as HTML.
$document->loadHTMLFile($filename);
} // Default to XML.
else {
$document->load($filename, $flags);
}
} // Emulate 'finally' behavior.
catch (Exception $e) {
restore_error_handler();
throw $e;
}
restore_error_handler();
return $document;
}
/**
* Determine whether a given string looks like XML or not.
*
* Basically, this scans a portion of the supplied string, checking to see
* if it has a tag-like structure. It is possible to "confuse" this, which
* may subsequently result in parse errors, but in the vast majority of
* cases, this method serves as a valid inicator of whether or not the
* content looks like XML.
*
* Things that are intentional excluded:
* - plain text with no markup.
* - strings that look like filesystem paths.
*
* Subclasses SHOULD NOT OVERRIDE THIS. Altering it may be altering
* core assumptions about how things work. Instead, classes should
* override the constructor and pass in only one of the parsed types
* that this class expects.
*/
protected function isXMLish($string)
{
return (strpos($string, '<') !== false && strpos($string, '>') !== false);
}
/**
* A utility function for retriving a match by index.
*
* The internal data structure used in DOMQuery does not have
* strong random access support, so we suppliment it with this method.
*
* @param $index
*
* @return object|void
*/
protected function getNthMatch(int $index)
{
if ($index < 0 || $index > $this->matches->count()) {
return;
}
$i = 0;
foreach ($this->matches as $m) {
if ($i++ === $index) {
return $m;
}
}
}
}