<?php

/**
 * @copyright Copyright © 2007 Paul Chaplin. All Rights Reserved.
 */

class XMLFormatter
{
	protected $reader;
	protected $xml;
	protected $encoding;
	protected $inlineElements = array(
		'a',
		'abbr',
		'acronym',
		'b',
		'bdo',
		'big',
		'br',
		'cite',
		'code',
		'del',
		'dfn',
		'em',
		'i',
		'img',
		'ins',
		'kbd',
		'noscript',
		//'object', // ???
		//'param', // ???
		'q',
		'samp',
		'span',
		'small',
		'strong',
		'sub',
		'sup',
		'tt',
	);
	// Elements to have their contents shrinkwrapped, e.g. <p>This is a <strong>paragraph!</strong></p>, and thus only the first tag will be indented.
	protected $shrinkwrappedElements = array(
		'address', // ???
		'li',
		'p',
		'script',
		'title',
		'h1',
		'h2',
		'h3',
		'h4',
		'h5',
		'h6',
	);
	// Elements here will not have any contained whitespace stripped, and will only be indented at the opening tag; takes priority over $shrinkwrapped.
	protected $preserveWhitespaceElements = array(
		'pre',
	);
	// DOCTYPEs hash
	protected $doctypes = array(
		'XHTML 1.1' => '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">',
		'XHTML 1.0 Strict' => '',
		'HTML 4.01 Strict' => '',
	);
	// String to indent elements with
	protected $indentString;
	
	public function __construct($xml = null, $encoding = 'utf-8', $options = null)
	{
		if ( $xml )
		{
			$this->loadXML($xml, $encoding, $options);
		}
	}
	
	public function loadXML($xml, $encoding = 'utf-8', $options = null)
	{
		$this->xml = $xml;
		$this->encoding = $encoding;
		$this->options = $options;
		return true;
	}
	
	public function format($indentString = "\t", $xmlDeclaration = true, $doctype = 'XHTML 1.1')
	{
		$this->reader = new XMLReader();
		$this->reader->XML($this->xml, $this->encoding, $this->options);
		$this->xml = '';
		$this->indentString = $indentString;
		
		while ( $this->reader->read() )
		{
			//$this->processNode();
			///*
			switch ( $this->reader->nodeType )
			{
				// Start (tag of an) element
				case 1:
					// Indentable?
					if ( !in_array($this->reader->name, $this->inlineElements) )
					{
						$this->xml .= $this->indent($indentString) . '<' . $this->reader->name;
					}
					else
					{
						$this->xml .= '<' . $this->reader->name;
					}
					
					if ( /*$this->reader->hasAttributes &&*/ $this->reader->moveToFirstAttribute() )
					{
						$attributes = array();
						// Assign the first attribute's value to the attribute name (an array key)
						$attributes[$this->reader->name] = $this->reader->value;
						// Loop through the other attributes
						while ( $this->reader->moveToNextAttribute() )
						{
							$attributes[$this->reader->name] = $this->reader->value;
						}
						
						krsort($attributes);
						foreach ( $attributes as $k => $v )
						{
							$this->xml .= ' ' . $k . '=' . '"' . htmlspecialchars($v, ENT_COMPAT) . '"';
						}
						
						$this->reader->moveToElement();
					}
					
					if ( $this->reader->isEmptyElement )
					{
						$this->xml .= '/>';
					}
					else
					{
						$this->xml .= '>';
					}
					break;
				// Text node
				case 3:
					$this->xml .= htmlspecialchars($this->reader->value, ENT_NOQUOTES);
					break;
				// A comment node
				case 8:
					$this->xml .= $this->indent($this->indentString) . '<!--' . $this->reader->value . '-->';
					break;
				// End (tag of an) element
				case 15:
					// Indentable?
					if ( !in_array($this->reader->name, $this->inlineElements)
						//&& !in_array($this->reader->name, $this->preserveWhitespaceElements)
						&& !in_array($this->reader->name, $this->shrinkwrappedElements)
					)
					{
						$this->xml .= $this->indent($this->indentString) . '</' . $this->reader->name . '>';
					}
					else
					{
						$this->xml .= '</' . $this->reader->name . '>';
					}
					break;
			}
			/**/
		}
		
		// Clean up for the sake of RAM.
		$this->reader = null;
		
		// Should be all pretty by now. Returning XHTML 1.1 for now...
		if ( $xmlDeclaration && $doctype && isset($this->doctypes[$doctype]) )
		{
			return '<?xml version="1.0" encoding="' . $this->encoding . '"?>' . "\n" . $this->doctypes[$doctype] . $this->xml;
		}
		else if ( $xmlDeclaration && ( !$doctype || !isset($this->doctypes[$doctype]) ) )
		{
			return '<?xml version="1.0" encoding="' . $this->encoding . '"?>' . $this->xml;
		}
		else if ( !$xmlDeclaration && $doctype && isset($this->doctypes[$doctype]) )
		{
			return $this->doctypes[$doctype] . $this->xml;
		}
		else
		{
			// Stip the leading newline
			return substr($this->xml, 1, strlen($this->xml));
		}
	}
	
	protected function indent($indentString)
	{
		return "\n" . str_repeat($indentString, ($this->reader->depth > 0) ? ($this->reader->depth - 1) : $this->reader->depth);
	}
		
	protected function processNode()
	{
		switch ( $this->reader->nodeType )
		{
			/* Surely no need to do this, since we discard via a whitelist anyway...
			// No node type
			case 0:
				// This ought not to happen; throw exception.
				throw new Exception('Unknown node type in ' . __CLASS__ . '; your XML may be broken.');
				break;
			*/
			// Start (tag of an) element
			case 1:
				// Indentable?
				if ( !in_array($this->reader->name, $this->inlineElements) )
				{
					$this->xml .= $this->indent($this->indentString) . '<' . $this->reader->name;
				}
				else
				{
					$this->xml .= '<' . $this->reader->name;
				}
				
				if ( $this->reader->hasAttributes && $this->reader->moveToFirstAttribute() )
				{
					$attributes = array();
					// Assign the first attribute's value to the attribute name (an array key)
					$attributes[$this->reader->name] = $this->reader->value;
					// Loop through the other attributes
					while ( $this->reader->moveToNextAttribute() )
					{
						$attributes[$this->reader->name] = $this->reader->value;
					}
					// Reorder the attributes, and append them to the output XML
					krsort($attributes);
					foreach ( $attributes as $k => $v )
					{
						$this->xml .= ' ' . $k . '=' . '"' . htmlspecialchars($v, ENT_COMPAT) . '"';
					}
					$this->reader->moveToElement();
				}
				// Shit! Hereafter, we're on *attributes*!
				if ( $this->reader->isEmptyElement )
				{
					$this->xml .= '/>';
				}
				// If we're dealing with #TEXT, is ->value any use at all here?
				else if ( $this->reader->hasValue )
				{
					$this->xml .= $this->reader->value;
				}
				else
				{
					$this->xml .= '>';
				}
				break;
			// Text node
			case 3:
				$this->xml .= htmlspecialchars($this->reader->value, ENT_NOQUOTES);
				break;
			// A comment node
			case 8:
				$this->xml .= $this->indent($this->indentString) . '<!--' . $this->reader->value . '-->';
				break;
			// End (tag of an) element
			case 15:
				// Indentable?
				if ( !in_array($this->reader->name, $this->inlineElements)
					&& !in_array($this->reader->name, $this->preserveWhitespaceElements)
					&& !in_array($this->reader->name, $this->shrinkwrappedElements)
				)
				{
					$this->xml .= $this->indent($this->indentString) . '</' . $this->reader->name . '>';
				}
				else
				{
					$this->xml .= '</' . $this->reader->name . '>';
				}
				break;
		}
	}
}

?>