Sanitizing Links
Script to sanitize links on string. This can be useful when trying to format links from untrusted sources.
Requirements:
- composer
- php-xml
- php-mbstring
- ezyang/htmlpurifier
Installation:
composer require ezyang/htmlpurifier
Codes:
<?php
function fix_special_links($text='') {
$fixed = $text;
$fixed = str_replace("/><br />", "/> <br>", $fixed);
$fixed = str_replace("/><br>", "/> <br>", $fixed);
$fixed = preg_replace("#\<http:\/\/(.*)(\/\>)+#i", "(http://$1)", $fixed);
$fixed = preg_replace("#\<https:\/\/(.*)(\/\>)+#i", "(https://$1)", $fixed);
return $fixed;
}
function sanitize_links($text='') {
$text = $this->fix_special_links($text);
$config = \HTMLPurifier_Config::createDefault();
$purifier = new \HTMLPurifier($config);
$text = $purifier->purify($text);
$return = $text;
try {
libxml_use_internal_errors(true);
libxml_clear_errors();
$doc = new DOMDocument('1.0', 'UTF-8');
$doc->preserveWhiteSpace = false;
$doc->loadHTML( mb_convert_encoding($text, 'HTML-ENTITIES', 'UTF-8'));
$links = $doc->getElementsByTagName('a');
//Loop through each <a> tags and replace them by their text content
for ($i = $links->length - 1; $i >= 0; $i--) {
$linkNode = $links->item($i);
$lnkText = $linkNode->textContent;
$newTxtNode = $doc->createTextNode($lnkText);
$linkNode->parentNode->replaceChild($newTxtNode, $linkNode);
}
$return = $doc->saveHTML($doc->documentElement);
$return = str_replace( array('<html>', '</html>', '<body>', '</body>'), array('', '', '', ''), $return);
} catch (Exception $e) {
//fallback on using preg_replace
$return = preg_replace('#<a.*?>([^>]*)</a>#i', '$1', $text);
}
$return = str_replace(">\n<", "><", $return);
$return = str_replace(">\r<", "><", $return);
return $return;
}
function sanitize_string($string='') {
$string = iconv("Windows-1251", "UTF-8", $string);
$string = preg_replace('/[\x{2019}\x{2018}\x{2019}\x{201A}\x{201B}\x{2032}\x{2035}]/u', "'", $string);
$string = preg_replace('/[\x{201C}\x{201D}\x{201E}\x{201F}\x{2033}\x{2036}]/u', '"', $string);
$string = preg_replace('/[[:^print:]]/', "", $string);
return $string;
}
Usage:
$string = "String with links <a href='https://randolphledesma.com'>https://randolphledesma.com</a>";
echo sanitize_links($string);