1 /* 2 This module implements the Mildew RegExp class. 3 4 ──────────────────────────────────────────────────────────────────────────────── 5 6 Copyright (C) 2021 pillager86.rf.gd 7 8 This program is free software: you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation, either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 15 PARTICULAR PURPOSE. See the GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along with 18 this program. If not, see <https://www.gnu.org/licenses/>. 19 */ 20 module mildew.stdlib.regexp; 21 22 static import std.regex; 23 24 import mildew.environment; 25 import mildew.interpreter; 26 import mildew.types; 27 28 /** 29 * Class that encapsulates regular expressions. The D struct Regex cannot be directly stored in a ScriptObject 30 */ 31 class ScriptRegExp 32 { 33 public: 34 /// ctor 35 this(in string pattern, in string flags="") 36 { 37 _regex = std.regex.regex(pattern, flags); 38 39 _source = pattern; 40 41 char[] unsortedFlags = flags.dup; 42 if(flags.length > 0) 43 { 44 for(size_t i = 0; i < unsortedFlags.length - 1; ++i) 45 { 46 for(size_t j = 0; j < unsortedFlags.length - i - 1; ++j) 47 { 48 if(unsortedFlags[j] > unsortedFlags[j+1]) 49 { 50 immutable swap = unsortedFlags[j]; 51 unsortedFlags[j] = unsortedFlags[j+1]; 52 unsortedFlags[j+1] = swap; 53 } 54 } 55 } 56 } 57 _flags = cast(string)unsortedFlags; 58 } 59 60 /// flags property 61 string flags() const { return _flags; } 62 63 /// last index property 64 size_t lastIndex() const { return _lastIndex; } 65 /// last index property 66 size_t lastIndex(size_t li) 67 { 68 return _lastIndex = li; 69 } 70 71 /// source property 72 string source() const { return _source; } 73 74 /// whether or not 's' flag was used 75 bool dotAll() const 76 { 77 foreach(ch ; _flags) 78 if(ch == 's') return true; 79 return false; 80 } 81 82 /// whether or not 'g' flag was used 83 bool global() const 84 { 85 foreach(ch ; _flags) 86 if(ch == 'g') return true; 87 return false; 88 } 89 90 /// whether or not 'i' flag was used 91 bool ignoreCase() const 92 { 93 foreach(ch ; _flags) 94 if(ch == 'i') return true; 95 return false; 96 } 97 98 /// whether or not 'm' flag was used 99 bool multiline() const 100 { 101 foreach(ch ; _flags) 102 if(ch == 'm') return true; 103 return false; 104 } 105 106 /// returns match 107 auto match(string str) 108 { 109 auto m = std.regex.match(str, _regex); 110 string[] result; 111 foreach(mat ; m) 112 result ~= mat.hit; 113 return result; 114 } 115 116 /// matchAll - The Script will implement this as an iterator once generators are a thing 117 auto matchAll(string str) 118 { 119 auto m = std.regex.matchAll(str, _regex); 120 return m; 121 } 122 123 /// replace 124 auto replace(string str, string fmt) 125 { 126 if(global) 127 return std.regex.replaceAll(str, _regex, fmt); 128 else 129 return std.regex.replaceFirst(str, _regex, fmt); 130 } 131 132 /// replace only the first occurrence. 133 auto replaceFirst(string str, string fmt) 134 { 135 string r = std.regex.replaceFirst(str, _regex, fmt); 136 return r; 137 } 138 139 /// search 140 auto search(string str) 141 { 142 auto m = std.regex.match(str, _regex); 143 return m.pre.length; 144 } 145 146 /// split 147 auto split(string str) 148 { 149 auto result = std.regex.split(str, _regex); 150 return result; 151 } 152 153 /// exec 154 string[] exec(string str) 155 { 156 string[] result; 157 std.regex.Captures!string mat; 158 if(str == _currentExec) 159 { 160 if(_lastIndex >= _currentExec.length) 161 return []; 162 mat = std.regex.matchFirst(str[_lastIndex..$], _regex); 163 } 164 else 165 { 166 if(str.length < 1) 167 return []; 168 _currentExec = str; 169 _lastIndex = 0; 170 mat = std.regex.matchFirst(str, _regex); 171 } 172 if(!mat.empty) 173 _lastIndex += mat.hit.length; 174 else 175 return []; 176 // result ~= mat.hit; 177 foreach(value ; mat) 178 { 179 result ~= value; 180 _lastIndex += value.length; 181 } 182 return result; 183 } 184 185 /// test 186 bool test(string str) 187 { 188 auto result = exec(str); 189 return result != null; 190 } 191 192 /// get the string representation 193 override string toString() const 194 { 195 return "/" ~ _source ~ "/" ~ _flags; 196 } 197 198 private: 199 string _currentExec; // change _matches if this changes 200 size_t _lastIndex; 201 202 string _source; // keep track of source 203 string _flags; // keep track of flags 204 std.regex.Regex!char _regex; 205 } 206 207 /// Initializes the RegExp namespace. Not necessary if regex literals are used. 208 void initializeRegExpLibrary(Interpreter interpreter) 209 { 210 ScriptAny ctor = new ScriptFunction("RegExp", &native_RegExp_ctor, true); 211 ctor["prototype"] = getRegExpProto(); 212 ctor["prototype"]["constructor"] = ctor; 213 214 interpreter.forceSetGlobal("RegExp", ctor, false); 215 } 216 217 /// Get the RegExp prototype. This is public because the VM needs it. 218 ScriptObject getRegExpProto() 219 { 220 if(_regExpProto is null) 221 { 222 _regExpProto = new ScriptObject("RegExp", null); 223 224 _regExpProto.addGetterProperty("flags", new ScriptFunction("RegExp.prototype.flags", &native_RegExp_p_flags)); 225 _regExpProto.addGetterProperty("lastIndex", new ScriptFunction("RegExp.prototype.lastIndex", 226 &native_RegExp_p_lastIndex)); 227 _regExpProto.addSetterProperty("lastIndex", new ScriptFunction("RegExp.prototype.lastIndex", 228 &native_RegExp_p_lastIndex)); 229 _regExpProto.addGetterProperty("source", new ScriptFunction("RegExp.prototype.source", 230 &native_RegExp_p_source)); 231 232 _regExpProto["dotAll"] = new ScriptFunction("RegExp.prototype.dotAll", &native_RegExp_dotAll); 233 _regExpProto["global"] = new ScriptFunction("RegExp.prototype.global", &native_RegExp_global); 234 _regExpProto["ignoreCase"] = new ScriptFunction("RegExp.prototype.ignoreCase", &native_RegExp_ignoreCase); 235 _regExpProto["multiline"] = new ScriptFunction("RegExp.prototype.multiline", &native_RegExp_multiline); 236 237 _regExpProto["match"] = new ScriptFunction("RegExp.prototype.match", &native_RegExp_match); 238 // TODO matchAll 239 _regExpProto["replace"] = new ScriptFunction("RegExp.prototype.replace", &native_RegExp_replace); 240 _regExpProto["search"] = new ScriptFunction("RegExp.prototype.search", &native_RegExp_search); 241 _regExpProto["split"] = new ScriptFunction("RegExp.prototype.split", &native_RegExp_split); 242 _regExpProto["exec"] = new ScriptFunction("RegExp.prototype.exec", &native_RegExp_exec); 243 _regExpProto["test"] = new ScriptFunction("RegExp.prototype.test", &native_RegExp_test); 244 } 245 return _regExpProto; 246 } 247 248 private ScriptObject _regExpProto; 249 250 ScriptAny native_RegExp_ctor(Environment env, ScriptAny* thisObj, ScriptAny[] args, ref NativeFunctionError nfe) 251 { 252 if(!thisObj.isObject) 253 return ScriptAny.UNDEFINED; 254 auto obj = thisObj.toValue!ScriptObject; 255 if(args.length < 1) 256 { 257 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 258 return ScriptAny.UNDEFINED; 259 } 260 auto pattern = args[0].toString(); 261 auto flags = args.length > 1 ? args[1].toString() : ""; 262 try 263 { 264 obj.nativeObject = new ScriptRegExp(pattern, flags); 265 } 266 catch(std.regex.RegexException rex) 267 { 268 nfe = NativeFunctionError.RETURN_VALUE_IS_EXCEPTION; 269 return ScriptAny(rex.msg); 270 } 271 return ScriptAny.UNDEFINED; 272 } 273 274 private ScriptAny native_RegExp_p_flags(Environment env, ScriptAny* thisObj, 275 ScriptAny[] args, ref NativeFunctionError nfe) 276 { 277 auto regExp = thisObj.toNativeObject!ScriptRegExp; 278 if(regExp is null) 279 { 280 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 281 return ScriptAny.UNDEFINED; 282 } 283 return ScriptAny(regExp.flags); 284 } 285 286 private ScriptAny native_RegExp_p_lastIndex(Environment env, ScriptAny* thisObj, 287 ScriptAny[] args, ref NativeFunctionError nfe) 288 { 289 auto regExp = thisObj.toNativeObject!ScriptRegExp; 290 if(regExp is null) 291 { 292 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 293 return ScriptAny.UNDEFINED; 294 } 295 if(args.length < 1) 296 return ScriptAny(regExp.lastIndex); 297 immutable index = args[0].toValue!size_t; 298 return ScriptAny(regExp.lastIndex = index); 299 } 300 301 private ScriptAny native_RegExp_p_source(Environment env, ScriptAny* thisObj, 302 ScriptAny[] args, ref NativeFunctionError nfe) 303 { 304 auto regExp = thisObj.toNativeObject!ScriptRegExp; 305 if(regExp is null) 306 { 307 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 308 return ScriptAny.UNDEFINED; 309 } 310 return ScriptAny(regExp.source); 311 } 312 313 private ScriptAny native_RegExp_dotAll(Environment env, ScriptAny* thisObj, 314 ScriptAny[] args, ref NativeFunctionError nfe) 315 { 316 auto regExp = thisObj.toNativeObject!ScriptRegExp; 317 if(regExp is null) 318 { 319 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 320 return ScriptAny.UNDEFINED; 321 } 322 return ScriptAny(regExp.dotAll()); 323 } 324 325 private ScriptAny native_RegExp_global(Environment env, ScriptAny* thisObj, 326 ScriptAny[] args, ref NativeFunctionError nfe) 327 { 328 auto regExp = thisObj.toNativeObject!ScriptRegExp; 329 if(regExp is null) 330 { 331 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 332 return ScriptAny.UNDEFINED; 333 } 334 return ScriptAny(regExp.global()); 335 } 336 337 private ScriptAny native_RegExp_ignoreCase(Environment env, ScriptAny* thisObj, 338 ScriptAny[] args, ref NativeFunctionError nfe) 339 { 340 auto regExp = thisObj.toNativeObject!ScriptRegExp; 341 if(regExp is null) 342 { 343 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 344 return ScriptAny.UNDEFINED; 345 } 346 return ScriptAny(regExp.ignoreCase()); 347 } 348 349 private ScriptAny native_RegExp_multiline(Environment env, ScriptAny* thisObj, 350 ScriptAny[] args, ref NativeFunctionError nfe) 351 { 352 auto regExp = thisObj.toNativeObject!ScriptRegExp; 353 if(regExp is null) 354 { 355 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 356 return ScriptAny.UNDEFINED; 357 } 358 return ScriptAny(regExp.multiline()); 359 } 360 361 private ScriptAny native_RegExp_match(Environment env, ScriptAny* thisObj, 362 ScriptAny[] args, ref NativeFunctionError nfe) 363 { 364 auto regExp = thisObj.toNativeObject!ScriptRegExp; 365 if(regExp is null) 366 { 367 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 368 return ScriptAny.UNDEFINED; 369 } 370 if(args.length < 1) 371 { 372 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 373 return ScriptAny.UNDEFINED; 374 } 375 auto str = args[0].toString(); 376 auto result = regExp.match(str); // @suppress(dscanner.suspicious.unmodified) 377 return ScriptAny(result); 378 } 379 380 // TODO matchAll once iterators are implemented 381 382 private ScriptAny native_RegExp_replace(Environment env, ScriptAny* thisObj, 383 ScriptAny[] args, ref NativeFunctionError nfe) 384 { 385 auto regExp = thisObj.toNativeObject!ScriptRegExp; 386 if(regExp is null) 387 { 388 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 389 return ScriptAny.UNDEFINED; 390 } 391 if(args.length < 2) 392 { 393 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 394 return ScriptAny.UNDEFINED; 395 } 396 auto str = args[0].toString(); 397 auto fmt = args[1].toString(); 398 return ScriptAny(regExp.replace(str, fmt)); 399 } 400 401 private ScriptAny native_RegExp_search(Environment env, ScriptAny* thisObj, 402 ScriptAny[] args, ref NativeFunctionError nfe) 403 { 404 auto regExp = thisObj.toNativeObject!ScriptRegExp; 405 if(regExp is null) 406 { 407 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 408 return ScriptAny.UNDEFINED; 409 } 410 if(args.length < 1) 411 { 412 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 413 return ScriptAny.UNDEFINED; 414 } 415 auto str = args[0].toString(); 416 return ScriptAny(regExp.search(str)); 417 } 418 419 private ScriptAny native_RegExp_split(Environment env, ScriptAny* thisObj, 420 ScriptAny[] args, ref NativeFunctionError nfe) 421 { 422 auto regExp = thisObj.toNativeObject!ScriptRegExp; 423 if(regExp is null) 424 { 425 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 426 return ScriptAny.UNDEFINED; 427 } 428 if(args.length < 1) 429 { 430 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 431 return ScriptAny.UNDEFINED; 432 } 433 auto str = args[0].toString(); 434 return ScriptAny(regExp.split(str)); 435 } 436 437 private ScriptAny native_RegExp_exec(Environment env, ScriptAny* thisObj, 438 ScriptAny[] args, ref NativeFunctionError nfe) 439 { 440 auto regExp = thisObj.toNativeObject!ScriptRegExp; 441 if(regExp is null) 442 { 443 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 444 return ScriptAny.UNDEFINED; 445 } 446 if(args.length < 1) 447 { 448 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 449 return ScriptAny.UNDEFINED; 450 } 451 auto str = args[0].toString(); 452 auto result = regExp.exec(str); // @suppress(dscanner.suspicious.unmodified) 453 return ScriptAny(regExp.exec(str)); 454 } 455 456 private ScriptAny native_RegExp_test(Environment env, ScriptAny* thisObj, 457 ScriptAny[] args, ref NativeFunctionError nfe) 458 { 459 auto regExp = thisObj.toNativeObject!ScriptRegExp; 460 if(regExp is null) 461 { 462 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 463 return ScriptAny.UNDEFINED; 464 } 465 if(args.length < 1) 466 { 467 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 468 return ScriptAny.UNDEFINED; 469 } 470 auto str = args[0].toString(); 471 return ScriptAny(regExp.test(str)); 472 } 473 474 unittest 475 { 476 import std.stdio: writeln, writefln; 477 auto testString = "foo bar foo bar foo"; 478 auto testRegexp = new ScriptRegExp("foo", "g"); 479 auto rg2 = new ScriptRegExp("bar"); 480 auto result = testRegexp.exec(testString); 481 assert(result != null); 482 while(result) 483 { 484 writeln(result); 485 result = testRegexp.exec(testString); 486 } 487 writeln(rg2.search(testString)); 488 }