Skip to content

Commit bc4e502

Browse files
committed
Use regex to split the SQL statement into tokens.
This is more peformant than looping through characters and using lots of substr calls. Comparison profile of before and after PR - https://blackfire.io/profiles/compare/b7b589a4-eee8-4c7c-adb5-d8ae6a3d7dde/graph Before: https://blackfire.io/profiles/1ce2dcd6-b98c-4fba-996a-3c9a552de38d/graph After: https://blackfire.io/profiles/8421e790-e374-45fd-8bca-b5219e752730/graph
1 parent d43cd12 commit bc4e502

2 files changed

Lines changed: 56 additions & 57 deletions

File tree

src/PHPSQLParser/lexer/LexerSplitter.php

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -56,41 +56,72 @@ class LexerSplitter {
5656
protected static $splitters = array("<=>", "\r\n", "!=", ">=", "<=", "<>", "<<", ">>", ":=", "\\", "&&", "||", ":=",
5757
"/*", "*/", "--", ">", "<", "|", "=", "^", "(", ")", "\t", "\n", "'", "\"", "`",
5858
",", "@", " ", "+", "-", "*", "/", ";");
59-
protected $tokenSize;
60-
protected $hashSet;
59+
60+
/**
61+
* @var string Regex string pattern of splitters.
62+
*/
63+
protected $splitterPattern;
6164

6265
/**
6366
* Constructor.
6467
*
6568
* It initializes some fields.
6669
*/
6770
public function __construct() {
68-
$this->tokenSize = strlen(self::$splitters[0]); // should be the largest one
69-
$this->hashSet = array_flip(self::$splitters);
71+
$this->splitterPattern = $this->convertSplittersToRegexPattern( self::$splitters );
7072
}
7173

72-
/**
73-
* Get the maximum length of a split token.
74-
*
75-
* The largest element must be on position 0 of the internal $_splitters array,
76-
* so the function returns the length of that token. It must be > 0.
77-
*
78-
* @return int The number of characters for the largest split token.
79-
*/
80-
public function getMaxLengthOfSplitter() {
81-
return $this->tokenSize;
74+
/**
75+
* Get the regex pattern string of all the splitters
76+
*
77+
* @return string
78+
*/
79+
public function getSplittersRegexPattern () {
80+
return $this->splitterPattern;
8281
}
8382

84-
/**
85-
* Looks into the internal split token array and compares the given token with
86-
* the array content. It returns true, if the token will be found, false otherwise.
87-
*
88-
* @param String $token a string, which could be a split token.
89-
*
90-
* @return boolean true, if the given string will be a split token, false otherwise
91-
*/
92-
public function isSplitter($token) {
93-
return isset($this->hashSet[$token]);
83+
/**
84+
* Convert an array of splitter tokens to a regex pattern string.
85+
*
86+
* @param array $splitters
87+
*
88+
* @return string
89+
*/
90+
public function convertSplittersToRegexPattern( $splitters ) {
91+
$regex_parts = array();
92+
foreach ( $splitters as $part ) {
93+
$part = preg_quote( $part );
94+
95+
switch ( $part ) {
96+
case "\r\n":
97+
$part = '\r\n';
98+
break;
99+
case "\t":
100+
$part = '\t';
101+
break;
102+
case "\n":
103+
$part = '\n';
104+
break;
105+
case " ":
106+
$part = '\s';
107+
break;
108+
case "/":
109+
$part = "\/";
110+
break;
111+
case "/\*":
112+
$part = "\/\*";
113+
break;
114+
case "\*/":
115+
$part = "\*\/";
116+
break;
117+
}
118+
119+
$regex_parts[] = $part;
120+
}
121+
122+
$pattern = implode( '|', $regex_parts );
123+
124+
return '/(' . $pattern . ')/';
94125
}
95126
}
96127

src/PHPSQLParser/lexer/PHPSQLLexer.php

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -85,39 +85,7 @@ public function split($sql) {
8585
throw new InvalidParameterException($sql);
8686
}
8787

88-
$tokens = array();
89-
$token = "";
90-
91-
$splitLen = $this->splitters->getMaxLengthOfSplitter();
92-
$found = false;
93-
$len = strlen($sql);
94-
$pos = 0;
95-
96-
while ($pos < $len) {
97-
98-
for ($i = $splitLen; $i > 0; $i--) {
99-
$substr = substr($sql, $pos, $i);
100-
if ($this->splitters->isSplitter($substr)) {
101-
102-
if ($token !== "") {
103-
$tokens[] = $token;
104-
}
105-
106-
$tokens[] = $substr;
107-
$pos += $i;
108-
$token = "";
109-
110-
continue 2;
111-
}
112-
}
113-
114-
$token .= $sql[$pos];
115-
$pos++;
116-
}
117-
118-
if ($token !== "") {
119-
$tokens[] = $token;
120-
}
88+
$tokens = preg_split($this->splitters->getSplittersRegexPattern(), $sql, null, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
12189

12290
$tokens = $this->concatEscapeSequences($tokens);
12391
$tokens = $this->balanceBackticks($tokens);

0 commit comments

Comments
 (0)