1 path.inc | path_clean_string($string, array $options = array()) |
Clean up a string segment to be used in an URL alias.
Performs the following possible alterations:
- Remove all HTML tags.
- Process the string through the transliteration module.
- Replace or remove punctuation with the separator character.
- Remove back-slashes.
- Replace non-ascii and non-numeric characters with the separator.
- Remove common words.
- Replace whitespace with the separator character.
- Trim duplicate, leading, and trailing separators.
- Convert to lower-case.
- Shorten to a desired length and logical position based on word boundaries.
This function should *not* be called on URL alias or path strings because it is assumed that they are already clean.
Parameters
$string: A string to clean.
array $options: (optional) A keyed array of settings and flags to control the path pattern string replacement process. Supported options are:
- langcode: A language code to be used when translating strings.
Return value
The cleaned string.:
File
- core/
modules/ path/ path.inc, line 98 - Miscellaneous functions for Path module.
Code
function path_clean_string($string, array $options = array()) {
// Use the advanced backdrop_static() pattern, since this is called very often.
static $backdrop_static_fast;
if (!isset($backdrop_static_fast)) {
$backdrop_static_fast['cache'] = &backdrop_static(__FUNCTION__);
}
$cache = &$backdrop_static_fast['cache'];
// Generate and cache variables used in this function so that on the second
// call to path_clean_string() we focus on processing.
if (!isset($cache)) {
$config = config('path.settings');
$cache = array(
'separator' => $config->get('separator'),
'strings' => array(),
'transliterate' => $config->get('transliterate'),
'punctuation' => array(),
'reduce_ascii' => (bool) $config->get('reduce_ascii'),
'ignore_words_regex' => FALSE,
'lowercase' => (bool) $config->get('case'),
'maxlength' => min($config->get('max_component_length'), _path_get_schema_alias_maxlength()),
);
// Generate and cache the punctuation replacements for strtr().
$punctuation = path_punctuation_chars();
foreach ($punctuation as $name => $details) {
$action = $config->get('punctuation_' . $name);
switch ($action) {
case PATH_PUNCTUATION_REMOVE:
$cache['punctuation'][$details['value']] = '';
break;
case PATH_PUNCTUATION_REPLACE:
$cache['punctuation'][$details['value']] = $cache['separator'];
break;
case PATH_PUNCTUATION_DO_NOTHING:
// Literally do nothing.
break;
}
}
// Copy settings from hyphen, single, and double-quotes to simple versions.
$fancy_character_map = array(
'‘' => '\'', // Single opening curly quote.
'’' => '\'', // Single closing curly quote.
'‚' => '\'', // Single low-quote.
'′' => '"', // Single prime.
'“' => '"', // Double opening curly quote.
'”' => '"', // Double closing curly quote.
'„' => '"', // Double low-quote.
'″' => '"', // Double prime.
'–' => '-', // En dash.
'—' => '-', // Em dash.
);
foreach ($fancy_character_map as $fancy_character => $simple_character) {
if (!isset($cache['punctuation'][$fancy_character])) {
$cache['punctuation'][$fancy_character] = $cache['punctuation'][$simple_character];
}
}
// Generate and cache the ignored words regular expression.
$ignore_words = $config->get('ignore_words');
$ignore_words_regex = preg_replace(array('/^[,\s]+|[,\s]+$/', '/[,\s]+/'), array('', '\b|\b'), $ignore_words);
if ($ignore_words_regex) {
$cache['ignore_words_regex'] = '\b' . $ignore_words_regex . '\b';
if (function_exists('mb_eregi_replace')) {
$cache['ignore_words_callback'] = 'mb_eregi_replace';
}
else {
$cache['ignore_words_callback'] = 'preg_replace';
$cache['ignore_words_regex'] = '/' . $cache['ignore_words_regex'] . '/i';
}
}
// Remove to prevent any unintentional use of $config outside of the cache.
unset($config);
}
// Empty strings do not need any processing.
if ($string === '' || $string === NULL) {
return '';
}
$langcode = LANGUAGE_NONE;
if (!empty($options['language']->langcode)) {
$langcode = $options['language']->langcode;
}
elseif (!empty($options['langcode'])) {
$langcode = $options['langcode'];
}
if ($langcode == LANGUAGE_NONE) {
// Paths for language neutral content get transliterated according to
// current language.
global $language;
// We are intentionally not using config_get('system.core', 'language_default')
// here. That can have unexpected behavior in cases such as on a multilingual
// site with language-neutral content types.
$langcode = $language->langcode;
}
// Check if the string has already been processed, and if so return the
// cached result.
if (isset($cache['strings'][$langcode][$string])) {
return $cache['strings'][$langcode][$string];
}
// Remove all HTML tags from the string.
$output = strip_tags(decode_entities($string));
// Optionally transliterate (by running through the Transliteration module).
if ($cache['transliterate']) {
// If the reduce strings to letters and numbers is enabled, don't bother
// replacing unknown characters with a question mark. Use an empty string
// instead.
include_once BACKDROP_ROOT . '/core/includes/transliteration.inc';
$output = transliteration_get($output, $cache['reduce_ascii'] ? '' : '?', $langcode);
}
// Replace or drop punctuation based on user settings.
$output = strtr($output, $cache['punctuation']);
// Reduce strings to letters and numbers.
if ($cache['reduce_ascii']) {
$output = preg_replace('/[^a-zA-Z0-9\/]+/', $cache['separator'], $output);
}
// Get rid of words that are on the ignore list.
if ($cache['ignore_words_regex']) {
$words_removed = $cache['ignore_words_callback']($cache['ignore_words_regex'], '', $output);
if (backdrop_strlen(trim($words_removed)) > 0) {
$output = $words_removed;
}
}
// Always replace whitespace with the separator.
$output = preg_replace('/\s+/', $cache['separator'], $output);
// Trim duplicates and remove trailing and leading separators.
$output = _path_clean_separators($output, $cache['separator']);
// Optionally convert to lower case.
if ($cache['lowercase']) {
$output = backdrop_strtolower($output);
}
// Shorten to a logical place based on word boundaries.
$output = truncate_utf8($output, $cache['maxlength'], TRUE);
// Cache this result in the static array.
$cache['strings'][$langcode][$string] = $output;
return $output;
}