1 common.inc _filter_xss_attributes($attributes)

Processes a string of HTML attributes.

Parameters

string $attributes: A space-separated list of attributes, such as 'class="foo bar" foo="bar"'.

Return value

array: Cleaned up version of the HTML attributes as an array.

Related topics

File

core/includes/common.inc, line 2065
Common functions that many Backdrop modules will need to reference.

Code

function _filter_xss_attributes($attributes) {
  $attributes_array = array();
  $mode = 0;
  $attribute_name = '';
  $skip = FALSE;
  $skip_protocol_filtering = FALSE;

  while (strlen($attributes) != 0) {
    // Was the last operation successful?
    $working = 0;

    switch ($mode) {
      case 0:
        // Attribute name, href for instance.
        if (preg_match('/^([-a-zA-Z]+)/', $attributes, $match)) {
          $attribute_name = strtolower($match[1]);
          $skip = (
            $attribute_name == 'style' ||
            substr($attribute_name, 0, 2) == 'on' ||
            substr($attribute_name, 0, 1) == '-' ||
            // Ignore long attributes to avoid unnecessary processing overhead.
            strlen($attribute_name) > 96
            );
          // Values for attributes of type URI should be filtered for
          // potentially malicious protocols (for example, an href-attribute
          // starting with "javascript:"). However, for some non-URI
          // attributes performing this filtering causes valid and safe data
          // to be mangled. We prevent this by skipping protocol filtering on
          // such attributes.
          // @see backdrop_strip_dangerous_protocols()
          // @see http://www.w3.org/TR/html4/index/attributes.html
          $skip_protocol_filtering = (strpos($attribute_name, 'data-') === 0) || in_array($attribute_name, array(
            'title',
            'alt',
            'rel',
            'property',
            'class',
            'datetime',
          ));

          $working = $mode = 1;
          $attributes = preg_replace('/^[-a-zA-Z][-a-zA-Z0-9]*/', '', $attributes);
        }
        break;

      case 1:
        // Equals sign or valueless ("selected").
        if (preg_match('/^\s*=\s*/', $attributes)) {
          $mode = 2;
          $working = 1;
          $attributes = preg_replace('/^\s*=\s*/', '', $attributes);
          break;
        }

        if (preg_match('/^\s+/', $attributes)) {
          $mode = 0;
          $working = 1;
          if (!$skip) {
            $attributes_array[$attribute_name] = $attribute_name;
          }
          $attributes = preg_replace('/^\s+/', '', $attributes);
        }
        break;

      case 2:
        // Attribute value, a URL after href= for instance.
        if (preg_match('/^"([^"]*)"(\s+|$)/', $attributes, $match)) {
          // Prevent filtering for some attribute values, where performing
          // the filtering would cause valid and safe data to be mangled.
          if ($skip_protocol_filtering) {
            $value = $match[1];
          }
          // Strip bad protocols from all other attributes, e.g. href, src.
          else {
            $value = filter_xss_bad_protocol($match[1]);
          }

          if (!$skip) {
            $attributes_array[$attribute_name] = $value;
          }
          $mode = 0;
          $working = 1;
          $attributes = preg_replace('/^"[^"]*"(\s+|$)/', '', $attributes);
          break;
        }

        if (preg_match("/^'([^']*)'(\s+|$)/", $attributes, $match)) {
          $mode = 0;
          $working = 1;
          $value = filter_xss_bad_protocol($match[1]);

          if (!$skip) {
            $attributes_array[$attribute_name] = $value;
          }
          $attributes = preg_replace("/^'[^']*'(\s+|$)/", '', $attributes);
          break;
        }

        if (preg_match("%^([^\s\"']+)(\s+|$)%", $attributes, $match)) {
          $mode = 0;
          $working = 1;
          $value = filter_xss_bad_protocol($match[1]);

          if (!$skip) {
            $attributes_array[$attribute_name] = $value;
          }
          $attributes = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attributes);
        }
        break;
    }

    if ($working == 0) {
      // Not well-formed; remove and try again.
      $attributes = preg_replace('/
        ^
        (
        "[^"]*("|$)     # - a string that starts with a double quote, up until the next double quote or the end of the string
        |               # or
        \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
        |               # or
        \S              # - a non-whitespace character
        )*              # any number of the above three
        \s*             # any number of whitespaces
        /x', '', $attributes);
      $mode = 0;
    }
  }

  // The attribute list ends with a valueless attribute like "selected".
  if ($mode == 1 && !$skip) {
    $attributes_array[] = $attribute_name;
  }
  return $attributes_array;
}