wishthis/vendor/phpcsstandards/phpcsutils/PHPCSUtils/Utils/TextStrings.php

332 lines
13 KiB
PHP
Raw Normal View History

2023-09-19 17:26:46 +00:00
<?php
/**
* PHPCSUtils, utility functions and classes for PHP_CodeSniffer sniff developers.
*
* @package PHPCSUtils
* @copyright 2019-2020 PHPCSUtils Contributors
* @license https://opensource.org/licenses/LGPL-3.0 LGPL3
* @link https://github.com/PHPCSStandards/PHPCSUtils
*/
namespace PHPCSUtils\Utils;
use PHP_CodeSniffer\Exceptions\RuntimeException;
use PHP_CodeSniffer\Files\File;
use PHP_CodeSniffer\Util\Tokens;
use PHPCSUtils\Internal\Cache;
use PHPCSUtils\Internal\NoFileCache;
use PHPCSUtils\Tokens\Collections;
use PHPCSUtils\Utils\GetTokensAsString;
/**
* Utility functions for working with text string tokens.
*
* @since 1.0.0
*/
final class TextStrings
{
/**
* Regex to match the start of an embedded variable/expression.
*
* Prevents matching escaped variables/expressions.
*
* @since 1.0.0
*
* @var string
*/
const START_OF_EMBED = '`(?<!\\\\)(\\\\{2})*(\{\$|\$\{|\$(?=[a-zA-Z_\x7f-\xff]))`';
/**
* Regex to match a "type 1" - directly embedded - variable without the dollar sign.
*
* Allows for array access and property access in as far as supported (single level).
*
* @since 1.0.0
*
* @var string
*/
const TYPE1_EMBED_AFTER_DOLLAR =
'`(?P<varname>[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)(?:\??->(?P>varname)|\[[^\]\'"\s]+\])?`';
/**
* Get the complete contents of a - potentially multi-line - text string.
*
* PHPCS tokenizes multi-line text strings with a single token for each line.
* This method can be used to retrieve the text string as it would be received and
* processed in PHP itself.
*
* This method is particularly useful for sniffs which examine the contents of text strings,
* where the content matching might result in false positives/false negatives if the text
* were to be examined line by line.
*
* @since 1.0.0
*
* @param \PHP_CodeSniffer\Files\File $phpcsFile The file where this token was found.
* @param int $stackPtr Pointer to the first text string token
* of a - potentially multi-line - text string
* or to a Nowdoc/Heredoc opener.
* @param bool $stripQuotes Optional. Whether to strip text delimiter
* quotes off the resulting text string.
* Defaults to `true`.
*
* @return string The contents of the complete text string.
*
* @throws \PHP_CodeSniffer\Exceptions\RuntimeException If the specified position is not a
* valid text string token.
* @throws \PHP_CodeSniffer\Exceptions\RuntimeException If the specified token is not the _first_
* token in a text string.
*/
public static function getCompleteTextString(File $phpcsFile, $stackPtr, $stripQuotes = true)
{
$tokens = $phpcsFile->getTokens();
$end = self::getEndOfCompleteTextString($phpcsFile, $stackPtr);
$stripNewline = false;
if ($tokens[$stackPtr]['code'] === \T_START_HEREDOC || $tokens[$stackPtr]['code'] === \T_START_NOWDOC) {
$stripQuotes = false;
$stripNewline = true;
$stackPtr = ($stackPtr + 1);
}
$contents = GetTokensAsString::normal($phpcsFile, $stackPtr, $end);
if ($stripNewline === true) {
// Heredoc/nowdoc: strip the new line at the end of the string to emulate how PHP sees the string.
$contents = \rtrim($contents, "\r\n");
}
if ($stripQuotes === true) {
return self::stripQuotes($contents);
}
return $contents;
}
/**
* Get the stack pointer to the end of a - potentially multi-line - text string.
*
* @see \PHPCSUtils\Utils\TextStrings::getCompleteTextString() Retrieve the contents of a complete - potentially
* multi-line - text string.
*
* @since 1.0.0
*
* @param \PHP_CodeSniffer\Files\File $phpcsFile The file where this token was found.
* @param int $stackPtr Pointer to the first text string token
* of a - potentially multi-line - text string
* or to a Nowdoc/Heredoc opener.
*
* @return int Stack pointer to the last token in the text string.
*
* @throws \PHP_CodeSniffer\Exceptions\RuntimeException If the specified position is not a
* valid text string token.
* @throws \PHP_CodeSniffer\Exceptions\RuntimeException If the specified token is not the _first_
* token in a text string.
*/
public static function getEndOfCompleteTextString(File $phpcsFile, $stackPtr)
{
$tokens = $phpcsFile->getTokens();
// Must be the start of a text string token.
if (isset($tokens[$stackPtr], Collections::textStringStartTokens()[$tokens[$stackPtr]['code']]) === false) {
throw new RuntimeException(
'$stackPtr must be of type T_START_HEREDOC, T_START_NOWDOC, T_CONSTANT_ENCAPSED_STRING'
. ' or T_DOUBLE_QUOTED_STRING'
);
}
if (isset(Tokens::$stringTokens[$tokens[$stackPtr]['code']]) === true) {
$prev = $phpcsFile->findPrevious(\T_WHITESPACE, ($stackPtr - 1), null, true);
if ($tokens[$stackPtr]['code'] === $tokens[$prev]['code']) {
throw new RuntimeException('$stackPtr must be the start of the text string');
}
}
if (Cache::isCached($phpcsFile, __METHOD__, $stackPtr) === true) {
return Cache::get($phpcsFile, __METHOD__, $stackPtr);
}
switch ($tokens[$stackPtr]['code']) {
case \T_START_HEREDOC:
$targetType = \T_HEREDOC;
$current = ($stackPtr + 1);
break;
case \T_START_NOWDOC:
$targetType = \T_NOWDOC;
$current = ($stackPtr + 1);
break;
default:
$targetType = $tokens[$stackPtr]['code'];
$current = $stackPtr;
break;
}
while (isset($tokens[$current]) && $tokens[$current]['code'] === $targetType) {
++$current;
}
$lastPtr = ($current - 1);
Cache::set($phpcsFile, __METHOD__, $stackPtr, $lastPtr);
return $lastPtr;
}
/**
* Strip text delimiter quotes from an arbitrary text string.
*
* Intended for use with the "content" of a `T_CONSTANT_ENCAPSED_STRING` / `T_DOUBLE_QUOTED_STRING`.
*
* - Prevents stripping mis-matched quotes.
* - Prevents stripping quotes from the textual content of the text string.
*
* @since 1.0.0
*
* @param string $textString The raw text string.
*
* @return string Text string without quotes around it.
*/
public static function stripQuotes($textString)
{
return \preg_replace('`^([\'"])(.*)\1$`Ds', '$2', $textString);
}
/**
* Get the embedded variables/expressions from an arbitrary string.
*
* Note: this function gets the complete variables/expressions _as they are embedded_,
* i.e. including potential curly brace wrappers, array access, method calls etc.
*
* @since 1.0.0
*
* @param string $text The contents of a T_DOUBLE_QUOTED_STRING or T_HEREDOC token.
*
* @return array<int, string> Array of encountered variable names/expressions with the offset at which
* the variable/expression was found in the string, as the key.
*/
public static function getEmbeds($text)
{
return self::getStripEmbeds($text)['embeds'];
}
/**
* Strip embedded variables/expressions from an arbitrary string.
*
* @since 1.0.0
*
* @param string $text The contents of a T_DOUBLE_QUOTED_STRING or T_HEREDOC token.
*
* @return string String without variables/expressions in it.
*/
public static function stripEmbeds($text)
{
return self::getStripEmbeds($text)['remaining'];
}
/**
* Split an arbitrary text string into embedded variables/expressions and remaining text.
*
* PHP contains four types of embedding syntaxes:
* 1. Directly embedding variables ("$foo");
* 2. Braces outside the variable ("{$foo}");
* 3. Braces after the dollar sign ("${foo}");
* 4. Variable variables ("${expr}", equivalent to (string) ${expr}).
*
* Type 3 and 4 are deprecated as of PHP 8.2 and will be removed in PHP 9.0.
*
* This method handles all types of embeds, including recognition of whether an embed is escaped or not.
*
* @link https://www.php.net/language.types.string#language.types.string.parsing PHP Manual on string parsing
* @link https://wiki.php.net/rfc/deprecate_dollar_brace_string_interpolation PHP RFC on deprecating select
* string interpolation syntaxes
*
* @since 1.0.0
*
* @param string $text The contents of a T_DOUBLE_QUOTED_STRING or T_HEREDOC token.
*
* @return array<string, mixed> Array containing two values:
* 1. An array containing a string representation of each embed encountered.
* The keys in this array are the integer offset within the original string
* where the embed was found.
* 2. The textual contents, embeds stripped out of it.
* The format of the array return value is:
* ```php
* array(
* 'embeds' => array<int, string>,
* 'remaining' => string,
* )
* ```
*/
public static function getStripEmbeds($text)
{
if (\strpos($text, '$') === false) {
return [
'embeds' => [],
'remaining' => $text,
];
}
$textHash = \md5($text);
if (NoFileCache::isCached(__METHOD__, $textHash) === true) {
return NoFileCache::get(__METHOD__, $textHash);
}
$offset = 0;
$strLen = \strlen($text); // Use iconv ?
$stripped = '';
$variables = [];
while (\preg_match(self::START_OF_EMBED, $text, $matches, \PREG_OFFSET_CAPTURE, $offset) === 1) {
$stripped .= \substr($text, $offset, ($matches[2][1] - $offset));
$matchedExpr = $matches[2][0];
$matchedOffset = $matches[2][1];
$braces = \substr_count($matchedExpr, '{');
$newOffset = $matchedOffset + \strlen($matchedExpr);
if ($braces === 0) {
/*
* Type 1: simple variable embed.
* Regex will always return a match due to the look ahead in the above regex.
*/
\preg_match(self::TYPE1_EMBED_AFTER_DOLLAR, $text, $endMatch, 0, $newOffset);
$matchedExpr .= $endMatch[0];
$variables[$matchedOffset] = $matchedExpr;
$offset = $newOffset + \strlen($endMatch[0]);
continue;
}
for (; $newOffset < $strLen; $newOffset++) {
if ($text[$newOffset] === '{') {
++$braces;
continue;
}
if ($text[$newOffset] === '}') {
--$braces;
if ($braces === 0) {
$matchedExpr = \substr($text, $matchedOffset, (1 + $newOffset - $matchedOffset));
$variables[$matchedOffset] = $matchedExpr;
$offset = ($newOffset + 1);
break;
}
}
}
}
if ($offset < $strLen) {
// Add the end of the string.
$stripped .= \substr($text, $offset);
}
$returnValue = [
'embeds' => $variables,
'remaining' => $stripped,
];
NoFileCache::set(__METHOD__, $textHash, $returnValue);
return $returnValue;
}
}