1 common.inc | _filter_xss_attributes($attributes) |
Processes a string of HTML attributes.
Parameters
string $attributes: A space-separated list of attributes, such as 'class="foo bar" foo="bar"'.
Return value
array: Cleaned up version of the HTML attributes as an array.
Related topics
File
- core/
includes/ common.inc, line 2065 - Common functions that many Backdrop modules will need to reference.
Code
function _filter_xss_attributes($attributes) {
$attributes_array = array();
$mode = 0;
$attribute_name = '';
$skip = FALSE;
$skip_protocol_filtering = FALSE;
while (strlen($attributes) != 0) {
// Was the last operation successful?
$working = 0;
switch ($mode) {
case 0:
// Attribute name, href for instance.
if (preg_match('/^([-a-zA-Z]+)/', $attributes, $match)) {
$attribute_name = strtolower($match[1]);
$skip = (
$attribute_name == 'style' ||
substr($attribute_name, 0, 2) == 'on' ||
substr($attribute_name, 0, 1) == '-' ||
// Ignore long attributes to avoid unnecessary processing overhead.
strlen($attribute_name) > 96
);
// Values for attributes of type URI should be filtered for
// potentially malicious protocols (for example, an href-attribute
// starting with "javascript:"). However, for some non-URI
// attributes performing this filtering causes valid and safe data
// to be mangled. We prevent this by skipping protocol filtering on
// such attributes.
// @see backdrop_strip_dangerous_protocols()
// @see http://www.w3.org/TR/html4/index/attributes.html
$skip_protocol_filtering = (strpos($attribute_name, 'data-') === 0) || in_array($attribute_name, array(
'title',
'alt',
'rel',
'property',
'class',
'datetime',
));
$working = $mode = 1;
$attributes = preg_replace('/^[-a-zA-Z][-a-zA-Z0-9]*/', '', $attributes);
}
break;
case 1:
// Equals sign or valueless ("selected").
if (preg_match('/^\s*=\s*/', $attributes)) {
$mode = 2;
$working = 1;
$attributes = preg_replace('/^\s*=\s*/', '', $attributes);
break;
}
if (preg_match('/^\s+/', $attributes)) {
$mode = 0;
$working = 1;
if (!$skip) {
$attributes_array[$attribute_name] = $attribute_name;
}
$attributes = preg_replace('/^\s+/', '', $attributes);
}
break;
case 2:
// Attribute value, a URL after href= for instance.
if (preg_match('/^"([^"]*)"(\s+|$)/', $attributes, $match)) {
// Prevent filtering for some attribute values, where performing
// the filtering would cause valid and safe data to be mangled.
if ($skip_protocol_filtering) {
$value = $match[1];
}
// Strip bad protocols from all other attributes, e.g. href, src.
else {
$value = filter_xss_bad_protocol($match[1]);
}
if (!$skip) {
$attributes_array[$attribute_name] = $value;
}
$mode = 0;
$working = 1;
$attributes = preg_replace('/^"[^"]*"(\s+|$)/', '', $attributes);
break;
}
if (preg_match("/^'([^']*)'(\s+|$)/", $attributes, $match)) {
$mode = 0;
$working = 1;
$value = filter_xss_bad_protocol($match[1]);
if (!$skip) {
$attributes_array[$attribute_name] = $value;
}
$attributes = preg_replace("/^'[^']*'(\s+|$)/", '', $attributes);
break;
}
if (preg_match("%^([^\s\"']+)(\s+|$)%", $attributes, $match)) {
$mode = 0;
$working = 1;
$value = filter_xss_bad_protocol($match[1]);
if (!$skip) {
$attributes_array[$attribute_name] = $value;
}
$attributes = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attributes);
}
break;
}
if ($working == 0) {
// Not well-formed; remove and try again.
$attributes = preg_replace('/
^
(
"[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string
| # or
\'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
| # or
\S # - a non-whitespace character
)* # any number of the above three
\s* # any number of whitespaces
/x', '', $attributes);
$mode = 0;
}
}
// The attribute list ends with a valueless attribute like "selected".
if ($mode == 1 && !$skip) {
$attributes_array[] = $attribute_name;
}
return $attributes_array;
}