@@ -56,41 +56,72 @@ class LexerSplitter {
5656 protected static $ splitters = array ("<=> " , "\r\n" , "!= " , ">= " , "<= " , "<> " , "<< " , ">> " , ":= " , "\\" , "&& " , "|| " , ":= " ,
5757 "/* " , "*/ " , "-- " , "> " , "< " , "| " , "= " , "^ " , "( " , ") " , "\t" , "\n" , "' " , "\"" , "` " ,
5858 ", " , "@ " , " " , "+ " , "- " , "* " , "/ " , "; " );
59- protected $ tokenSize ;
60- protected $ hashSet ;
59+
60+ /**
61+ * @var string Regex string pattern of splitters.
62+ */
63+ protected $ splitterPattern ;
6164
6265 /**
6366 * Constructor.
6467 *
6568 * It initializes some fields.
6669 */
6770 public function __construct () {
68- $ this ->tokenSize = strlen (self ::$ splitters [0 ]); // should be the largest one
69- $ this ->hashSet = array_flip (self ::$ splitters );
71+ $ this ->splitterPattern = $ this ->convertSplittersToRegexPattern ( self ::$ splitters );
7072 }
7173
72- /**
73- * Get the maximum length of a split token.
74- *
75- * The largest element must be on position 0 of the internal $_splitters array,
76- * so the function returns the length of that token. It must be > 0.
77- *
78- * @return int The number of characters for the largest split token.
79- */
80- public function getMaxLengthOfSplitter () {
81- return $ this ->tokenSize ;
74+ /**
75+ * Get the regex pattern string of all the splitters
76+ *
77+ * @return string
78+ */
79+ public function getSplittersRegexPattern () {
80+ return $ this ->splitterPattern ;
8281 }
8382
84- /**
85- * Looks into the internal split token array and compares the given token with
86- * the array content. It returns true, if the token will be found, false otherwise.
87- *
88- * @param String $token a string, which could be a split token.
89- *
90- * @return boolean true, if the given string will be a split token, false otherwise
91- */
92- public function isSplitter ($ token ) {
93- return isset ($ this ->hashSet [$ token ]);
83+ /**
84+ * Convert an array of splitter tokens to a regex pattern string.
85+ *
86+ * @param array $splitters
87+ *
88+ * @return string
89+ */
90+ public function convertSplittersToRegexPattern ( $ splitters ) {
91+ $ regex_parts = array ();
92+ foreach ( $ splitters as $ part ) {
93+ $ part = preg_quote ( $ part );
94+
95+ switch ( $ part ) {
96+ case "\r\n" :
97+ $ part = '\r\n ' ;
98+ break ;
99+ case "\t" :
100+ $ part = '\t ' ;
101+ break ;
102+ case "\n" :
103+ $ part = '\n ' ;
104+ break ;
105+ case " " :
106+ $ part = '\s ' ;
107+ break ;
108+ case "/ " :
109+ $ part = "\/ " ;
110+ break ;
111+ case "/\* " :
112+ $ part = "\/\* " ;
113+ break ;
114+ case "\*/ " :
115+ $ part = "\*\/ " ;
116+ break ;
117+ }
118+
119+ $ regex_parts [] = $ part ;
120+ }
121+
122+ $ pattern = implode ( '| ' , $ regex_parts );
123+
124+ return '/( ' . $ pattern . ')/ ' ;
94125 }
95126}
96127
0 commit comments