1 /* 2 This module implements the Mildew RegExp class. See 3 https://pillager86.github.io/dmildew/RegExp.html 4 5 ──────────────────────────────────────────────────────────────────────────────── 6 7 Copyright (C) 2021 pillager86.rf.gd 8 9 This program is free software: you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation, either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT ANY 15 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 16 PARTICULAR PURPOSE. See the GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <https://www.gnu.org/licenses/>. 20 */ 21 module mildew.stdlib.regexp; 22 23 static import std.regex; 24 25 import mildew.environment; 26 import mildew.interpreter; 27 import mildew.types; 28 29 /** 30 * Class that encapsulates regular expressions. The D struct Regex cannot be directly stored in a ScriptObject 31 */ 32 class ScriptRegExp 33 { 34 public: 35 /// ctor 36 this(in string pattern, in string flags="") 37 { 38 _regex = std.regex.regex(pattern, flags); 39 40 _source = pattern; 41 42 char[] unsortedFlags = flags.dup; 43 if(flags.length > 0) 44 { 45 for(size_t i = 0; i < unsortedFlags.length - 1; ++i) 46 { 47 for(size_t j = 0; j < unsortedFlags.length - i - 1; ++j) 48 { 49 if(unsortedFlags[j] > unsortedFlags[j+1]) 50 { 51 immutable swap = unsortedFlags[j]; 52 unsortedFlags[j] = unsortedFlags[j+1]; 53 unsortedFlags[j+1] = swap; 54 } 55 } 56 } 57 } 58 _flags = cast(string)unsortedFlags; 59 } 60 61 /// flags property 62 string flags() const { return _flags; } 63 64 /// last index property 65 size_t lastIndex() const { return _lastIndex; } 66 /// last index property 67 size_t lastIndex(size_t li) 68 { 69 return _lastIndex = li; 70 } 71 72 /// source property 73 string source() const { return _source; } 74 75 /// whether or not 's' flag was used 76 bool dotAll() const 77 { 78 foreach(ch ; _flags) 79 if(ch == 's') return true; 80 return false; 81 } 82 83 /// whether or not 'g' flag was used 84 bool global() const 85 { 86 foreach(ch ; _flags) 87 if(ch == 'g') return true; 88 return false; 89 } 90 91 /// whether or not 'i' flag was used 92 bool ignoreCase() const 93 { 94 foreach(ch ; _flags) 95 if(ch == 'i') return true; 96 return false; 97 } 98 99 /// whether or not 'm' flag was used 100 bool multiline() const 101 { 102 foreach(ch ; _flags) 103 if(ch == 'm') return true; 104 return false; 105 } 106 107 /// returns match 108 auto match(string str) 109 { 110 auto m = std.regex.match(str, _regex); 111 string[] result; 112 foreach(mat ; m) 113 result ~= mat.hit; 114 return result; 115 } 116 117 /// matchAll - The Script will implement this as an iterator once generators are a thing 118 auto matchAll(string str) 119 { 120 auto m = std.regex.matchAll(str, _regex); 121 return m; 122 } 123 124 /// replace 125 auto replace(string str, string fmt) 126 { 127 if(global) 128 return std.regex.replaceAll(str, _regex, fmt); 129 else 130 return std.regex.replaceFirst(str, _regex, fmt); 131 } 132 133 /// replace only the first occurrence. 134 auto replaceFirst(string str, string fmt) 135 { 136 string r = std.regex.replaceFirst(str, _regex, fmt); 137 return r; 138 } 139 140 /// search 141 auto search(string str) 142 { 143 auto m = std.regex.match(str, _regex); 144 return m.pre.length; 145 } 146 147 /// split 148 auto split(string str) 149 { 150 auto result = std.regex.split(str, _regex); 151 return result; 152 } 153 154 /// exec 155 string[] exec(string str) 156 { 157 string[] result; 158 std.regex.Captures!string mat; 159 if(str == _currentExec) 160 { 161 if(_lastIndex >= _currentExec.length) 162 return []; 163 mat = std.regex.matchFirst(str[_lastIndex..$], _regex); 164 } 165 else 166 { 167 if(str.length < 1) 168 return []; 169 _currentExec = str; 170 _lastIndex = 0; 171 mat = std.regex.matchFirst(str, _regex); 172 } 173 if(!mat.empty) 174 _lastIndex += mat.hit.length; 175 else 176 return []; 177 // result ~= mat.hit; 178 foreach(value ; mat) 179 { 180 result ~= value; 181 _lastIndex += value.length; 182 } 183 return result; 184 } 185 186 /// test 187 bool test(string str) 188 { 189 auto result = exec(str); 190 return result != null; 191 } 192 193 /// get the string representation 194 override string toString() const 195 { 196 return "/" ~ _source ~ "/" ~ _flags; 197 } 198 199 private: 200 string _currentExec; // change _matches if this changes 201 size_t _lastIndex; 202 203 string _source; // keep track of source 204 string _flags; // keep track of flags 205 std.regex.Regex!char _regex; 206 } 207 208 /** 209 * Initializes the RegExp constructor. This is not necessary as regex literals are a first class 210 * language feature. Documentation for this library can be found at 211 * https://pillager86.github.io/dmildew/RegExp.html 212 */ 213 void initializeRegExpLibrary(Interpreter interpreter) 214 { 215 ScriptAny ctor = new ScriptFunction("RegExp", &native_RegExp_ctor, true); 216 ctor["prototype"] = getRegExpProto(); 217 ctor["prototype"]["constructor"] = ctor; 218 219 interpreter.forceSetGlobal("RegExp", ctor, false); 220 } 221 222 /// Get the RegExp prototype. This is public because the VM needs it. 223 ScriptObject getRegExpProto() 224 { 225 if(_regExpProto is null) 226 { 227 _regExpProto = new ScriptObject("RegExp", null); 228 229 _regExpProto.addGetterProperty("flags", new ScriptFunction("RegExp.prototype.flags", &native_RegExp_p_flags)); 230 _regExpProto.addGetterProperty("lastIndex", new ScriptFunction("RegExp.prototype.lastIndex", 231 &native_RegExp_p_lastIndex)); 232 _regExpProto.addSetterProperty("lastIndex", new ScriptFunction("RegExp.prototype.lastIndex", 233 &native_RegExp_p_lastIndex)); 234 _regExpProto.addGetterProperty("source", new ScriptFunction("RegExp.prototype.source", 235 &native_RegExp_p_source)); 236 237 _regExpProto["dotAll"] = new ScriptFunction("RegExp.prototype.dotAll", &native_RegExp_dotAll); 238 _regExpProto["global"] = new ScriptFunction("RegExp.prototype.global", &native_RegExp_global); 239 _regExpProto["ignoreCase"] = new ScriptFunction("RegExp.prototype.ignoreCase", &native_RegExp_ignoreCase); 240 _regExpProto["multiline"] = new ScriptFunction("RegExp.prototype.multiline", &native_RegExp_multiline); 241 _regExpProto["match"] = new ScriptFunction("RegExp.prototype.match", &native_RegExp_match); 242 _regExpProto["matchAll"] = new ScriptFunction("RegExp.prototype.matchAll", &native_RegExp_matchAll); 243 _regExpProto["replace"] = new ScriptFunction("RegExp.prototype.replace", &native_RegExp_replace); 244 _regExpProto["search"] = new ScriptFunction("RegExp.prototype.search", &native_RegExp_search); 245 _regExpProto["split"] = new ScriptFunction("RegExp.prototype.split", &native_RegExp_split); 246 _regExpProto["exec"] = new ScriptFunction("RegExp.prototype.exec", &native_RegExp_exec); 247 _regExpProto["test"] = new ScriptFunction("RegExp.prototype.test", &native_RegExp_test); 248 } 249 return _regExpProto; 250 } 251 252 private ScriptObject _regExpProto; 253 254 private ScriptAny native_RegExp_ctor(Environment env, ScriptAny* thisObj, ScriptAny[] args, ref NativeFunctionError nfe) 255 { 256 if(!thisObj.isObject) 257 return ScriptAny.UNDEFINED; 258 auto obj = thisObj.toValue!ScriptObject; 259 if(args.length < 1) 260 { 261 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 262 return ScriptAny.UNDEFINED; 263 } 264 auto pattern = args[0].toString(); 265 auto flags = args.length > 1 ? args[1].toString() : ""; 266 try 267 { 268 obj.nativeObject = new ScriptRegExp(pattern, flags); 269 } 270 catch(std.regex.RegexException rex) 271 { 272 nfe = NativeFunctionError.RETURN_VALUE_IS_EXCEPTION; 273 return ScriptAny(rex.msg); 274 } 275 return ScriptAny.UNDEFINED; 276 } 277 278 private ScriptAny native_RegExp_p_flags(Environment env, ScriptAny* thisObj, 279 ScriptAny[] args, ref NativeFunctionError nfe) 280 { 281 auto regExp = thisObj.toNativeObject!ScriptRegExp; 282 if(regExp is null) 283 { 284 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 285 return ScriptAny.UNDEFINED; 286 } 287 return ScriptAny(regExp.flags); 288 } 289 290 private ScriptAny native_RegExp_p_lastIndex(Environment env, ScriptAny* thisObj, 291 ScriptAny[] args, ref NativeFunctionError nfe) 292 { 293 auto regExp = thisObj.toNativeObject!ScriptRegExp; 294 if(regExp is null) 295 { 296 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 297 return ScriptAny.UNDEFINED; 298 } 299 if(args.length < 1) 300 return ScriptAny(regExp.lastIndex); 301 immutable index = args[0].toValue!size_t; 302 return ScriptAny(regExp.lastIndex = index); 303 } 304 305 private ScriptAny native_RegExp_p_source(Environment env, ScriptAny* thisObj, 306 ScriptAny[] args, ref NativeFunctionError nfe) 307 { 308 auto regExp = thisObj.toNativeObject!ScriptRegExp; 309 if(regExp is null) 310 { 311 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 312 return ScriptAny.UNDEFINED; 313 } 314 return ScriptAny(regExp.source); 315 } 316 317 private ScriptAny native_RegExp_dotAll(Environment env, ScriptAny* thisObj, 318 ScriptAny[] args, ref NativeFunctionError nfe) 319 { 320 auto regExp = thisObj.toNativeObject!ScriptRegExp; 321 if(regExp is null) 322 { 323 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 324 return ScriptAny.UNDEFINED; 325 } 326 return ScriptAny(regExp.dotAll()); 327 } 328 329 private ScriptAny native_RegExp_global(Environment env, ScriptAny* thisObj, 330 ScriptAny[] args, ref NativeFunctionError nfe) 331 { 332 auto regExp = thisObj.toNativeObject!ScriptRegExp; 333 if(regExp is null) 334 { 335 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 336 return ScriptAny.UNDEFINED; 337 } 338 return ScriptAny(regExp.global()); 339 } 340 341 private ScriptAny native_RegExp_ignoreCase(Environment env, ScriptAny* thisObj, 342 ScriptAny[] args, ref NativeFunctionError nfe) 343 { 344 auto regExp = thisObj.toNativeObject!ScriptRegExp; 345 if(regExp is null) 346 { 347 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 348 return ScriptAny.UNDEFINED; 349 } 350 return ScriptAny(regExp.ignoreCase()); 351 } 352 353 private ScriptAny native_RegExp_multiline(Environment env, ScriptAny* thisObj, 354 ScriptAny[] args, ref NativeFunctionError nfe) 355 { 356 auto regExp = thisObj.toNativeObject!ScriptRegExp; 357 if(regExp is null) 358 { 359 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 360 return ScriptAny.UNDEFINED; 361 } 362 return ScriptAny(regExp.multiline()); 363 } 364 365 private ScriptAny native_RegExp_match(Environment env, ScriptAny* thisObj, 366 ScriptAny[] args, ref NativeFunctionError nfe) 367 { 368 auto regExp = thisObj.toNativeObject!ScriptRegExp; 369 if(regExp is null) 370 { 371 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 372 return ScriptAny.UNDEFINED; 373 } 374 if(args.length < 1) 375 { 376 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 377 return ScriptAny.UNDEFINED; 378 } 379 auto str = args[0].toString(); 380 auto result = regExp.match(str); // @suppress(dscanner.suspicious.unmodified) 381 return ScriptAny(result); 382 } 383 384 private ScriptAny native_RegExp_matchAll(Environment env, ScriptAny* thisObj, 385 ScriptAny[] args, ref NativeFunctionError nfe) 386 { 387 import std.concurrency: yield; 388 import mildew.stdlib.generator: ScriptGenerator, getGeneratorPrototype; 389 390 auto regExp = thisObj.toNativeObject!ScriptRegExp; 391 if(regExp is null) 392 { 393 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 394 return ScriptAny.UNDEFINED; 395 } 396 if(args.length < 1) 397 { 398 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 399 return ScriptAny.UNDEFINED; 400 } 401 auto str = args[0].toString(); 402 ScriptAny func(Environment env, ScriptAny* thisObj, ScriptAny[] args, ref NativeFunctionError nfe) 403 { 404 auto matches = regExp.matchAll(str); 405 foreach(match; matches) 406 yield!ScriptAny(ScriptAny(match.hit)); 407 return ScriptAny.UNDEFINED; 408 } 409 auto generator = new ScriptGenerator(env, new ScriptFunction("iterator", &func), []); 410 auto result = new ScriptObject("Iterator", getGeneratorPrototype, generator); 411 return ScriptAny(result); 412 } 413 414 private ScriptAny native_RegExp_replace(Environment env, ScriptAny* thisObj, 415 ScriptAny[] args, ref NativeFunctionError nfe) 416 { 417 auto regExp = thisObj.toNativeObject!ScriptRegExp; 418 if(regExp is null) 419 { 420 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 421 return ScriptAny.UNDEFINED; 422 } 423 if(args.length < 2) 424 { 425 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 426 return ScriptAny.UNDEFINED; 427 } 428 auto str = args[0].toString(); 429 auto fmt = args[1].toString(); 430 return ScriptAny(regExp.replace(str, fmt)); 431 } 432 433 private ScriptAny native_RegExp_search(Environment env, ScriptAny* thisObj, 434 ScriptAny[] args, ref NativeFunctionError nfe) 435 { 436 auto regExp = thisObj.toNativeObject!ScriptRegExp; 437 if(regExp is null) 438 { 439 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 440 return ScriptAny.UNDEFINED; 441 } 442 if(args.length < 1) 443 { 444 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 445 return ScriptAny.UNDEFINED; 446 } 447 auto str = args[0].toString(); 448 return ScriptAny(regExp.search(str)); 449 } 450 451 private ScriptAny native_RegExp_split(Environment env, ScriptAny* thisObj, 452 ScriptAny[] args, ref NativeFunctionError nfe) 453 { 454 auto regExp = thisObj.toNativeObject!ScriptRegExp; 455 if(regExp is null) 456 { 457 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 458 return ScriptAny.UNDEFINED; 459 } 460 if(args.length < 1) 461 { 462 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 463 return ScriptAny.UNDEFINED; 464 } 465 auto str = args[0].toString(); 466 return ScriptAny(regExp.split(str)); 467 } 468 469 private ScriptAny native_RegExp_exec(Environment env, ScriptAny* thisObj, 470 ScriptAny[] args, ref NativeFunctionError nfe) 471 { 472 auto regExp = thisObj.toNativeObject!ScriptRegExp; 473 if(regExp is null) 474 { 475 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 476 return ScriptAny.UNDEFINED; 477 } 478 if(args.length < 1) 479 { 480 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 481 return ScriptAny.UNDEFINED; 482 } 483 auto str = args[0].toString(); 484 auto result = regExp.exec(str); // @suppress(dscanner.suspicious.unmodified) 485 return ScriptAny(regExp.exec(str)); 486 } 487 488 private ScriptAny native_RegExp_test(Environment env, ScriptAny* thisObj, 489 ScriptAny[] args, ref NativeFunctionError nfe) 490 { 491 auto regExp = thisObj.toNativeObject!ScriptRegExp; 492 if(regExp is null) 493 { 494 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 495 return ScriptAny.UNDEFINED; 496 } 497 if(args.length < 1) 498 { 499 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 500 return ScriptAny.UNDEFINED; 501 } 502 auto str = args[0].toString(); 503 return ScriptAny(regExp.test(str)); 504 } 505 506 unittest 507 { 508 import std.stdio: writeln, writefln; 509 auto testString = "foo bar foo bar foo"; 510 auto testRegexp = new ScriptRegExp("foo", "g"); 511 auto rg2 = new ScriptRegExp("bar"); 512 auto result = testRegexp.exec(testString); 513 assert(result != null); 514 while(result) 515 { 516 writeln(result); 517 result = testRegexp.exec(testString); 518 } 519 writeln(rg2.search(testString)); 520 }