File content/formatting/parser/ContentFormattingParser.class.php
1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75: 76: 77: 78: 79: 80: 81: 82: 83: 84: 85: 86: 87: 88: 89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125: 126: 127: 128: 129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139: 140: 141: 142: 143: 144: 145: 146: 147: 148: 149: 150: 151: 152: 153: 154: 155: 156: 157: 158: 159: 160: 161: 162: 163: 164: 165: 166: 167: 168: 169: 170: 171: 172: 173: 174: 175: 176: 177: 178: 179: 180: 181: 182: 183: 184: 185: 186: 187: 188: 189: 190: 191: 192: 193: 194: 195: 196: 197: 198: 199: 200: 201: 202: 203: 204: 205: 206: 207: 208: 209: 210: 211: 212: 213: 214: 215: 216: 217: 218: 219: 220: 221: 222: 223: 224: 225: 226: 227: 228: 229: 230: 231: 232: 233: 234: 235: 236: 237: 238: 239: 240: 241: 242: 243: 244: 245: 246: 247: 248: 249: 250: 251: 252: 253: 254: 255: 256: 257: 258: 259: 260: 261: 262: 263: 264: 265: 266: 267: 268: 269: 270: 271: 272: 273: 274: 275: 276: 277: 278: 279: 280: 281: 282: 283: 284: 285: 286: 287: 288: 289: 290: 291: 292: 293: 294: 295: 296: 297: 298: 299: 300: 301:
<?php
/**
* This class is abstract. It contains tools that are usefull for implement a content parser.
* @package Content
* @subpackage Formatting\parser
* @copyright © 2005-2019 PHPBoost
* @license https://www.gnu.org/licenses/gpl-3.0.html GNU/GPL-3.0
* @author Benoit SAUTEL <ben.popeye@phpboost.com>
* @version PHPBoost 5.2 - last update: 2018 03 29
* @since PHPBoost 2.0 - 2008 08 10
* @contributor Julien BRISWALTER <j1.seth@phpboost.com>
* @contributor Arnaud GENET <elenwii@phpboost.com>
* @contributor mipel <mipel@phpboost.com>
*/
abstract class ContentFormattingParser extends AbstractParser
{
/**
* @var string[] Authorization of the HTML BBCode tag.
*/
protected $html_auth = array();
/**
* @var string[] List of the BBCode forbidden tags
*/
protected $forbidden_tags = array();
/**
* Buils a ContentFormattingParser object.
*/
public function __construct()
{
parent::__construct();
$content_formatting_config = ContentFormattingConfig::load();
$this->forbidden_tags = $content_formatting_config->get_forbidden_tags();
$this->html_auth = $content_formatting_config->get_html_tag_auth();
}
/**
* Parses the content of the parser
* @return void You will find the result by using the get_content method
*/
public function parse()
{
$this->content = Url::html_convert_absolute2root_relative($this->content, $this->path_to_root, $this->page_path);
}
/**
* Sets the tags which mustn't be parsed.
* @param string[] $forbidden_tags list of the name of the tags which mustn't be parsed.
*/
public function set_forbidden_tags(array $forbidden_tags)
{
if (is_array($forbidden_tags))
{
$this->forbidden_tags = $forbidden_tags;
}
}
/**
* Gets the forbidden tags.
* @return string[] List of the forbidden tags
*/
public function get_forbidden_tags()
{
return $this->forbidden_tags;
}
/**
* Sets the required authorizations that are necessary to post some HTML code which
* will be displayed by the web browser.
* @param mixed[] $array_auth authorization array
*/
public function set_html_auth(array $array_auth)
{
$this->html_auth = $array_auth;
}
/**
* Returns the HTML tag auth
* @return mixed[]
*/
public function get_html_auth()
{
return $this->html_auth;
}
/**
* Splits a string accorting to a tag name.
* Works also with nested tags.
* @param string $content Content to split, will be converted in a string[] variable containing the following pattern:
* <ul>
* <li>The content between two tags (or at the begening or the end of the content)</li>
* <li>The parameter of the tag</li>
* <li>The content of the tag. If it contains a nested tag, it will be parsed according to the same pattern.</li>
* </ul>
* @param string $tag Tag name
* @param string $attributes Regular expression of the attribute form
*/
protected function split_imbricated_tag(&$content, $tag, $attributes)
{
$content = self::preg_split_safe_recurse($content, $tag, $attributes);
//1 élément représente les inter tag, un les attributs tag et l'autre le contenu
$nbr_occur = count($content);
for ($i = 0; $i < $nbr_occur; $i++)
{
//C'est le contenu d'un tag, il contient un sous tag donc on éclate
if (($i % 3) === 2 && preg_match('`\[' . $tag . '(?:' . $attributes . ')?\].+\[/' . $tag . '\]`su', $content[$i]))
{
self::split_imbricated_tag($content[$i], $tag, $attributes);
}
}
}
/**
* Splits a string according to a regular expression. The matched pattern can be nested and must follow the BBCode syntax,
* i.e matching [tag=args]content of the tag[/tag].
* It returns an array
* For example, il you have this: $my_str = '[tag=1]test1[/tag]test2[tag=2]test3[tag=3]test4[/tag]test5[/tag]?est6';
* You call it like that: ContentFormattingParser::preg_split_safe_recurse($my_str, 'tag', '[0-9]');
* It will return you array('', '1', 'test1', 'test2', '2', array('test3', '3', 'test4', 'test5'), 'test6').
* @param $content string Content into which you want to search the pattern
* @param $tag string BBCode tage name
* @param $attributes string The regular expression (PCRE syntax) corresponding to the arguments which you want to match.
* There mustn't be any matching parenthesis into that regular expression
* @return string[] the split string
*/
protected static function preg_split_safe_recurse($content, $tag, $attributes)
{
// Définitions des index de position de début des tags valides
$index_tags = self::index_tags($content, $tag, $attributes);
$size = count($index_tags);
$parsed = array();
// Stockage de la chaîne avant le premier tag dans le cas ou il y a au moins une balise ouvrante
if ($size >= 1)
{
array_push($parsed, TextHelper::substr($content, 0, $index_tags[0]));
}
else
{
array_push($parsed, $content);
}
for ($i = 0; $i < $size; $i++)
{
$current_index = $index_tags[$i];
// Calcul de la sous-chaîne pour l'expression régulière
if ($i == ($size - 1))
{
$sub_str = TextHelper::substr($content, $current_index);
}
else
{
$sub_str = TextHelper::substr($content, $current_index, $index_tags[$i + 1] - $current_index);
}
// Mise en place de l'éclatement de la sous-chaine
$mask = '`\[' . $tag . '(' . $attributes . ')?\](.*)\[/' . $tag . '\](.+)?`su';
$local_parsed = preg_split($mask, $sub_str, -1, PREG_SPLIT_DELIM_CAPTURE);
if (count($local_parsed) == 1)
{
// Remplissage des résultats
$parsed[count($parsed) - 1] .= $local_parsed[0]; // Ce n'est pas un tag
}
else
{
// Remplissage des résultats
array_push($parsed, $local_parsed[1]); // attributs du tag
array_push($parsed, $local_parsed[2]); // contenu du tag
}
// Chaine après le tag
if ($i < ($size - 1))
{
// On prend la chaine après le tag de fermeture courant jusqu'au prochain tag d'ouverture
$current_tag_len = TextHelper::strlen('[' . $tag . $local_parsed[1] . ']' . $local_parsed[2] . '[/' . $tag . ']');
$end_pos = $index_tags[$i + 1] - ($current_index + $current_tag_len);
array_push($parsed, TextHelper::substr($local_parsed[3], 0, $end_pos ));
}
elseif (isset($local_parsed[3]))
{ // c'est la fin, il n'y a pas d'autre tag ouvrant après
array_push($parsed, $local_parsed[3]);
}
}
return $parsed;
}
/**
* @static
* Indexes the position of all the tags in the document. Returns the list of the positions of each tag.
* @param $content string Content into which index the positions.
* @param $tag string tag name
* @param $attributes The regular expression matching the parameters of the tag (see the preg_split_safe_recurse method).
* @return int[] The positions of the opening tags.
*/
private static function index_tags($content, $tag, $attributes)
{
$pos = -1;
$nb_open_tags = 0;
$tag_pos = array();
while (($pos = strpos($content, '[' . $tag, $pos + 1)) !== false)
{
// nombre de tags de fermeture déjà rencontrés
$nb_close_tags = TextHelper::substr_count(TextHelper::substr($content, 0, ($pos + TextHelper::strlen('['.$tag))), '[/'.$tag.']');
// Si on trouve un tag d'ouverture, on sauvegarde sa position uniquement si il y a autant + 1 de tags fermés avant et on itère sur le suivant
if ($nb_open_tags == $nb_close_tags)
{
$open_tag = TextHelper::substr($content, $pos, (strpos($content, ']', $pos + 1) + 1 - $pos));
$match = preg_match('`\[' . $tag . '(' . $attributes . ')?\]`u', $open_tag);
if ($match == 1)
{
$tag_pos[count($tag_pos)] = $pos;
}
}
$nb_open_tags++;
}
return $tag_pos;
}
/**
* Removes the content of the tag $tag and replaces them by an identifying code. They will be reinserted in the content by the reimplant_tags method.
* It enables you to treat the whole string enough affecting the interior of some tags.
* Example: $my_parser contains this content: 'test1[tag=1]test2[/tag]test3'
* $my_parser->pick_up_tag('tag', '[0-9]'); will replace the content of the parser by 'test1[CODE_TAG_1]test3'
* @param $tag string The tag to isolate
* @param $arguments string The regular expression matching the arguments syntax.
*/
protected function pick_up_tag($tag, $arguments = '')
{
//On éclate le contenu selon les tags (avec imbrication bien qu'on ne les gèrera pas => ça permettra de faire [code][code]du code[/code][/code])
$split_code = $this->preg_split_safe_recurse($this->content, $tag, $arguments);
$num_codes = count($split_code);
//Si on a des apparitions de la balise
if ($num_codes > 1)
{
$this->content = '';
$id_code = 0;
//On balaye le tableau trouvé
for ($i = 0; $i < $num_codes; $i++)
{
//Contenu inter tags
if ($i % 3 == 0)
{
$this->content .= $split_code[$i];
//Si on n'est pas après la dernière balise fermante, on met une balise de signalement de la position du tag
if ($i < $num_codes - 1)
{
$this->content .= '[' . TextHelper::strtoupper($tag) . '_TAG_' . $id_code++ . ']';
}
}
//Contenu des balises
elseif ($i % 3 == 2)
{
//Enregistrement dans le tableau du contenu des tags à isoler
$this->array_tags[$tag][] = '[' . $tag . $split_code[$i - 1] . ']' . str_replace('<br />', "\n", $split_code[$i]) . '[/' . $tag . ']';
}
}
}
}
/**
* reimplants the code which has been picked up by the _pick_up method.
* @param $tag string tag to reimplant.
* @return bool True if the reimplantation succed, otherwise false.
*/
protected function reimplant_tag($tag)
{
//Si cette balise a été isolée
if (!array_key_exists($tag, $this->array_tags))
{
return false;
}
$num_code = count($this->array_tags[$tag]);
//On réinjecte tous les contenus des balises
for ($i = 0; $i < $num_code; $i++)
{
$this->content = str_replace('[' . TextHelper::strtoupper($tag) . '_TAG_' . $i . ']', $this->array_tags[$tag][$i], $this->content);
}
//On efface tout ce qu'on a prélevé du array
$this->array_tags[$tag] = array();
return true;
}
protected function parse_feed_tag()
{
$this->content = str_replace(array('[[FEED', '[[/FEED]]'), array('\[\[FEED', '\[\[/FEED\]\]'), $this->content);
$this->content = preg_replace('`\[feed((?: [a-z]+="[^"]+")*)\]([a-z]+)\[/feed\]`uU', '[[FEED$1]]$2[[/FEED]]', $this->content);
$this->content = str_replace(array('\[\[FEED', '\[\[/FEED\]\]'), array('[[FEED', '[[/FEED]]'), $this->content);
}
}
?>