1 /* 2 This module implements the Mildew RegExp class. See 3 https://pillager86.github.io/dmildew/RegExp.html 4 5 ──────────────────────────────────────────────────────────────────────────────── 6 7 Copyright (C) 2021 pillager86.rf.gd 8 9 This program is free software: you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation, either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT ANY 15 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 16 PARTICULAR PURPOSE. See the GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <https://www.gnu.org/licenses/>. 20 */ 21 module mildew.stdlib.regexp; 22 23 static import std.regex; 24 25 import mildew.environment; 26 import mildew.interpreter; 27 import mildew.types; 28 29 /** 30 * Class that encapsulates regular expressions. The D struct Regex cannot be directly stored in a ScriptObject 31 */ 32 class ScriptRegExp 33 { 34 public: 35 /// ctor 36 this(in string pattern, in string flags="") 37 { 38 _regex = std.regex.regex(pattern, flags); 39 40 _source = pattern; 41 42 char[] unsortedFlags = flags.dup; 43 if(flags.length > 0) 44 { 45 for(size_t i = 0; i < unsortedFlags.length - 1; ++i) 46 { 47 for(size_t j = 0; j < unsortedFlags.length - i - 1; ++j) 48 { 49 if(unsortedFlags[j] > unsortedFlags[j+1]) 50 { 51 immutable swap = unsortedFlags[j]; 52 unsortedFlags[j] = unsortedFlags[j+1]; 53 unsortedFlags[j+1] = swap; 54 } 55 } 56 } 57 } 58 _flags = cast(string)unsortedFlags; 59 } 60 61 /// flags property 62 string flags() const { return _flags; } 63 64 /// last index property 65 size_t lastIndex() const { return _lastIndex; } 66 /// last index property 67 size_t lastIndex(size_t li) 68 { 69 return _lastIndex = li; 70 } 71 72 /// source property 73 string source() const { return _source; } 74 75 /// whether or not 's' flag was used 76 bool dotAll() const 77 { 78 foreach(ch ; _flags) 79 if(ch == 's') return true; 80 return false; 81 } 82 83 /// whether or not 'g' flag was used 84 bool global() const 85 { 86 foreach(ch ; _flags) 87 if(ch == 'g') return true; 88 return false; 89 } 90 91 /// whether or not 'i' flag was used 92 bool ignoreCase() const 93 { 94 foreach(ch ; _flags) 95 if(ch == 'i') return true; 96 return false; 97 } 98 99 /// whether or not 'm' flag was used 100 bool multiline() const 101 { 102 foreach(ch ; _flags) 103 if(ch == 'm') return true; 104 return false; 105 } 106 107 /// returns match 108 auto match(string str) 109 { 110 auto m = std.regex.match(str, _regex); 111 string[] result; 112 foreach(mat ; m) 113 result ~= mat.hit; 114 return result; 115 } 116 117 /// matchAll - The Script will implement this as an iterator once generators are a thing 118 auto matchAll(string str) 119 { 120 auto m = std.regex.matchAll(str, _regex); 121 return m; 122 } 123 124 /// replace 125 auto replace(string str, string fmt) 126 { 127 if(global) 128 return std.regex.replaceAll(str, _regex, fmt); 129 else 130 return std.regex.replaceFirst(str, _regex, fmt); 131 } 132 133 /// replace only the first occurrence. 134 auto replaceFirst(string str, string fmt) 135 { 136 string r = std.regex.replaceFirst(str, _regex, fmt); 137 return r; 138 } 139 140 /// search 141 auto search(string str) 142 { 143 auto m = std.regex.match(str, _regex); 144 if(m.pre.length == str.length) 145 return -1; 146 return m.pre.length; 147 } 148 149 /// split 150 auto split(string str) 151 { 152 auto result = std.regex.split(str, _regex); 153 return result; 154 } 155 156 /// exec 157 string[] exec(string str) 158 { 159 string[] result; 160 std.regex.Captures!string mat; 161 if(str == _currentExec) 162 { 163 if(_lastIndex >= _currentExec.length) 164 return []; 165 mat = std.regex.matchFirst(str[_lastIndex..$], _regex); 166 } 167 else 168 { 169 if(str.length < 1) 170 return []; 171 _currentExec = str; 172 _lastIndex = 0; 173 mat = std.regex.matchFirst(str, _regex); 174 } 175 if(!mat.empty) 176 _lastIndex += mat.hit.length; 177 else 178 return []; 179 // result ~= mat.hit; 180 foreach(value ; mat) 181 { 182 result ~= value; 183 _lastIndex += value.length; 184 } 185 return result; 186 } 187 188 /// test 189 bool test(string str) 190 { 191 auto result = exec(str); 192 return result != null; 193 } 194 195 /// get the string representation 196 override string toString() const 197 { 198 return "/" ~ _source ~ "/" ~ _flags; 199 } 200 201 private: 202 string _currentExec; // change _matches if this changes 203 size_t _lastIndex; 204 205 string _source; // keep track of source 206 string _flags; // keep track of flags 207 std.regex.Regex!char _regex; 208 } 209 210 /** 211 * Initializes the RegExp constructor. This is not necessary as regex literals are a first class 212 * language feature. Documentation for this library can be found at 213 * https://pillager86.github.io/dmildew/RegExp.html 214 * Params: 215 * interpreter = The Interpreter instance to load the RegExp constructor into. 216 */ 217 void initializeRegExpLibrary(Interpreter interpreter) 218 { 219 ScriptAny ctor = new ScriptFunction("RegExp", &native_RegExp_ctor, true); 220 ctor["prototype"] = getRegExpProto(); 221 ctor["prototype"]["constructor"] = ctor; 222 223 interpreter.forceSetGlobal("RegExp", ctor, false); 224 } 225 226 /// Get the RegExp prototype. This is public because the VM needs it. 227 ScriptObject getRegExpProto() 228 { 229 if(_regExpProto is null) 230 { 231 _regExpProto = new ScriptObject("RegExp", null); 232 233 _regExpProto.addGetterProperty("flags", new ScriptFunction("RegExp.prototype.flags", &native_RegExp_p_flags)); 234 _regExpProto.addGetterProperty("lastIndex", new ScriptFunction("RegExp.prototype.lastIndex", 235 &native_RegExp_p_lastIndex)); 236 _regExpProto.addSetterProperty("lastIndex", new ScriptFunction("RegExp.prototype.lastIndex", 237 &native_RegExp_p_lastIndex)); 238 _regExpProto.addGetterProperty("source", new ScriptFunction("RegExp.prototype.source", 239 &native_RegExp_p_source)); 240 241 _regExpProto["dotAll"] = new ScriptFunction("RegExp.prototype.dotAll", &native_RegExp_dotAll); 242 _regExpProto["global"] = new ScriptFunction("RegExp.prototype.global", &native_RegExp_global); 243 _regExpProto["ignoreCase"] = new ScriptFunction("RegExp.prototype.ignoreCase", &native_RegExp_ignoreCase); 244 _regExpProto["multiline"] = new ScriptFunction("RegExp.prototype.multiline", &native_RegExp_multiline); 245 _regExpProto["match"] = new ScriptFunction("RegExp.prototype.match", &native_RegExp_match); 246 _regExpProto["matchAll"] = new ScriptFunction("RegExp.prototype.matchAll", &native_RegExp_matchAll); 247 _regExpProto["replace"] = new ScriptFunction("RegExp.prototype.replace", &native_RegExp_replace); 248 _regExpProto["search"] = new ScriptFunction("RegExp.prototype.search", &native_RegExp_search); 249 _regExpProto["split"] = new ScriptFunction("RegExp.prototype.split", &native_RegExp_split); 250 _regExpProto["exec"] = new ScriptFunction("RegExp.prototype.exec", &native_RegExp_exec); 251 _regExpProto["test"] = new ScriptFunction("RegExp.prototype.test", &native_RegExp_test); 252 } 253 return _regExpProto; 254 } 255 256 private ScriptObject _regExpProto; 257 258 private ScriptAny native_RegExp_ctor(Environment env, ScriptAny* thisObj, ScriptAny[] args, ref NativeFunctionError nfe) 259 { 260 if(!thisObj.isObject) 261 return ScriptAny.UNDEFINED; 262 auto obj = thisObj.toValue!ScriptObject; 263 if(args.length < 1) 264 { 265 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 266 return ScriptAny.UNDEFINED; 267 } 268 auto pattern = args[0].toString(); 269 auto flags = args.length > 1 ? args[1].toString() : ""; 270 try 271 { 272 obj.nativeObject = new ScriptRegExp(pattern, flags); 273 } 274 catch(std.regex.RegexException rex) 275 { 276 nfe = NativeFunctionError.RETURN_VALUE_IS_EXCEPTION; 277 return ScriptAny(rex.msg); 278 } 279 return ScriptAny.UNDEFINED; 280 } 281 282 private ScriptAny native_RegExp_p_flags(Environment env, ScriptAny* thisObj, 283 ScriptAny[] args, ref NativeFunctionError nfe) 284 { 285 auto regExp = thisObj.toNativeObject!ScriptRegExp; 286 if(regExp is null) 287 { 288 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 289 return ScriptAny.UNDEFINED; 290 } 291 return ScriptAny(regExp.flags); 292 } 293 294 private ScriptAny native_RegExp_p_lastIndex(Environment env, ScriptAny* thisObj, 295 ScriptAny[] args, ref NativeFunctionError nfe) 296 { 297 auto regExp = thisObj.toNativeObject!ScriptRegExp; 298 if(regExp is null) 299 { 300 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 301 return ScriptAny.UNDEFINED; 302 } 303 if(args.length < 1) 304 return ScriptAny(regExp.lastIndex); 305 immutable index = args[0].toValue!size_t; 306 return ScriptAny(regExp.lastIndex = index); 307 } 308 309 private ScriptAny native_RegExp_p_source(Environment env, ScriptAny* thisObj, 310 ScriptAny[] args, ref NativeFunctionError nfe) 311 { 312 auto regExp = thisObj.toNativeObject!ScriptRegExp; 313 if(regExp is null) 314 { 315 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 316 return ScriptAny.UNDEFINED; 317 } 318 return ScriptAny(regExp.source); 319 } 320 321 private ScriptAny native_RegExp_dotAll(Environment env, ScriptAny* thisObj, 322 ScriptAny[] args, ref NativeFunctionError nfe) 323 { 324 auto regExp = thisObj.toNativeObject!ScriptRegExp; 325 if(regExp is null) 326 { 327 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 328 return ScriptAny.UNDEFINED; 329 } 330 return ScriptAny(regExp.dotAll()); 331 } 332 333 private ScriptAny native_RegExp_global(Environment env, ScriptAny* thisObj, 334 ScriptAny[] args, ref NativeFunctionError nfe) 335 { 336 auto regExp = thisObj.toNativeObject!ScriptRegExp; 337 if(regExp is null) 338 { 339 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 340 return ScriptAny.UNDEFINED; 341 } 342 return ScriptAny(regExp.global()); 343 } 344 345 private ScriptAny native_RegExp_ignoreCase(Environment env, ScriptAny* thisObj, 346 ScriptAny[] args, ref NativeFunctionError nfe) 347 { 348 auto regExp = thisObj.toNativeObject!ScriptRegExp; 349 if(regExp is null) 350 { 351 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 352 return ScriptAny.UNDEFINED; 353 } 354 return ScriptAny(regExp.ignoreCase()); 355 } 356 357 private ScriptAny native_RegExp_multiline(Environment env, ScriptAny* thisObj, 358 ScriptAny[] args, ref NativeFunctionError nfe) 359 { 360 auto regExp = thisObj.toNativeObject!ScriptRegExp; 361 if(regExp is null) 362 { 363 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 364 return ScriptAny.UNDEFINED; 365 } 366 return ScriptAny(regExp.multiline()); 367 } 368 369 private ScriptAny native_RegExp_match(Environment env, ScriptAny* thisObj, 370 ScriptAny[] args, ref NativeFunctionError nfe) 371 { 372 auto regExp = thisObj.toNativeObject!ScriptRegExp; 373 if(regExp is null) 374 { 375 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 376 return ScriptAny.UNDEFINED; 377 } 378 if(args.length < 1) 379 { 380 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 381 return ScriptAny.UNDEFINED; 382 } 383 auto str = args[0].toString(); 384 auto result = regExp.match(str); // @suppress(dscanner.suspicious.unmodified) 385 return ScriptAny(result); 386 } 387 388 private ScriptAny native_RegExp_matchAll(Environment env, ScriptAny* thisObj, 389 ScriptAny[] args, ref NativeFunctionError nfe) 390 { 391 import std.concurrency: yield; 392 import mildew.stdlib.generator: ScriptGenerator, getGeneratorPrototype; 393 394 auto regExp = thisObj.toNativeObject!ScriptRegExp; 395 if(regExp is null) 396 { 397 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 398 return ScriptAny.UNDEFINED; 399 } 400 if(args.length < 1) 401 { 402 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 403 return ScriptAny.UNDEFINED; 404 } 405 auto str = args[0].toString(); 406 ScriptAny func(Environment env, ScriptAny* thisObj, ScriptAny[] args, ref NativeFunctionError nfe) 407 { 408 auto matches = regExp.matchAll(str); 409 foreach(match; matches) 410 yield!ScriptAny(ScriptAny(match.hit)); 411 return ScriptAny.UNDEFINED; 412 } 413 auto generator = new ScriptGenerator(env, new ScriptFunction("iterator", &func), []); 414 auto result = new ScriptObject("Iterator", getGeneratorPrototype, generator); 415 return ScriptAny(result); 416 } 417 418 private ScriptAny native_RegExp_replace(Environment env, ScriptAny* thisObj, 419 ScriptAny[] args, ref NativeFunctionError nfe) 420 { 421 auto regExp = thisObj.toNativeObject!ScriptRegExp; 422 if(regExp is null) 423 { 424 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 425 return ScriptAny.UNDEFINED; 426 } 427 if(args.length < 2) 428 { 429 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 430 return ScriptAny.UNDEFINED; 431 } 432 auto str = args[0].toString(); 433 auto fmt = args[1].toString(); 434 return ScriptAny(regExp.replace(str, fmt)); 435 } 436 437 private ScriptAny native_RegExp_search(Environment env, ScriptAny* thisObj, 438 ScriptAny[] args, ref NativeFunctionError nfe) 439 { 440 auto regExp = thisObj.toNativeObject!ScriptRegExp; 441 if(regExp is null) 442 { 443 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 444 return ScriptAny.UNDEFINED; 445 } 446 if(args.length < 1) 447 { 448 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 449 return ScriptAny.UNDEFINED; 450 } 451 auto str = args[0].toString(); 452 return ScriptAny(regExp.search(str)); 453 } 454 455 private ScriptAny native_RegExp_split(Environment env, ScriptAny* thisObj, 456 ScriptAny[] args, ref NativeFunctionError nfe) 457 { 458 auto regExp = thisObj.toNativeObject!ScriptRegExp; 459 if(regExp is null) 460 { 461 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 462 return ScriptAny.UNDEFINED; 463 } 464 if(args.length < 1) 465 { 466 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 467 return ScriptAny.UNDEFINED; 468 } 469 auto str = args[0].toString(); 470 return ScriptAny(regExp.split(str)); 471 } 472 473 private ScriptAny native_RegExp_exec(Environment env, ScriptAny* thisObj, 474 ScriptAny[] args, ref NativeFunctionError nfe) 475 { 476 auto regExp = thisObj.toNativeObject!ScriptRegExp; 477 if(regExp is null) 478 { 479 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 480 return ScriptAny.UNDEFINED; 481 } 482 if(args.length < 1) 483 { 484 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 485 return ScriptAny.UNDEFINED; 486 } 487 auto str = args[0].toString(); 488 auto result = regExp.exec(str); // @suppress(dscanner.suspicious.unmodified) 489 return ScriptAny(regExp.exec(str)); 490 } 491 492 private ScriptAny native_RegExp_test(Environment env, ScriptAny* thisObj, 493 ScriptAny[] args, ref NativeFunctionError nfe) 494 { 495 auto regExp = thisObj.toNativeObject!ScriptRegExp; 496 if(regExp is null) 497 { 498 nfe = NativeFunctionError.WRONG_TYPE_OF_ARG; 499 return ScriptAny.UNDEFINED; 500 } 501 if(args.length < 1) 502 { 503 nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS; 504 return ScriptAny.UNDEFINED; 505 } 506 auto str = args[0].toString(); 507 return ScriptAny(regExp.test(str)); 508 } 509 510 unittest 511 { 512 import std.stdio: writeln, writefln; 513 auto testString = "foo bar foo bar foo"; 514 auto testRegexp = new ScriptRegExp("foo", "g"); 515 auto rg2 = new ScriptRegExp("bar"); 516 auto result = testRegexp.exec(testString); 517 assert(result != null); 518 while(result) 519 { 520 writeln(result); 521 result = testRegexp.exec(testString); 522 } 523 writeln(rg2.search(testString)); 524 }