1 /*
2 This module implements the Mildew RegExp class. See
3 https://pillager86.github.io/dmildew/RegExp.html
4 
5 ────────────────────────────────────────────────────────────────────────────────
6 
7 Copyright (C) 2021 pillager86.rf.gd
8 
9 This program is free software: you can redistribute it and/or modify it under 
10 the terms of the GNU General Public License as published by the Free Software 
11 Foundation, either version 3 of the License, or (at your option) any later 
12 version.
13 
14 This program is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
16 PARTICULAR PURPOSE.  See the GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License along with 
19 this program.  If not, see <https://www.gnu.org/licenses/>.
20 */
21 module mildew.stdlib.regexp;
22 
23 static import std.regex;
24 
25 import mildew.environment;
26 import mildew.interpreter;
27 import mildew.types;
28 
29 /**
30  * Class that encapsulates regular expressions. The D struct Regex cannot be directly stored in a ScriptObject
31  */
32 class ScriptRegExp
33 {
34 public:
35     /// ctor
36     this(in string pattern, in string flags="")
37     {
38         _regex = std.regex.regex(pattern, flags);
39 
40         _source = pattern;
41 
42         char[] unsortedFlags = flags.dup;
43         if(flags.length > 0)
44         {
45             for(size_t i = 0; i < unsortedFlags.length - 1; ++i)
46             {
47                 for(size_t j = 0; j < unsortedFlags.length - i - 1; ++j)
48                 {
49                     if(unsortedFlags[j] > unsortedFlags[j+1])
50                     {
51                         immutable swap = unsortedFlags[j];
52                         unsortedFlags[j] = unsortedFlags[j+1];
53                         unsortedFlags[j+1] = swap;
54                     }
55                 }
56             }
57         }
58         _flags = cast(string)unsortedFlags;
59     }
60 
61     /// flags property
62     string flags() const { return _flags; }
63 
64     /// last index property
65     size_t lastIndex() const { return _lastIndex; }
66     /// last index property
67     size_t lastIndex(size_t li)
68     {
69         return _lastIndex = li;
70     }
71 
72     /// source property
73     string source() const { return _source; }
74 
75     /// whether or not 's' flag was used
76     bool dotAll() const 
77     {
78         foreach(ch ; _flags)
79             if(ch == 's') return true;
80         return false;
81     }
82 
83     /// whether or not 'g' flag was used
84     bool global() const 
85     {
86         foreach(ch ; _flags)
87             if(ch == 'g') return true;
88         return false;
89     }
90 
91     /// whether or not 'i' flag was used
92     bool ignoreCase() const
93     {
94         foreach(ch ; _flags)
95             if(ch == 'i') return true;
96         return false;
97     }
98 
99     /// whether or not 'm' flag was used
100     bool multiline() const 
101     {
102         foreach(ch ; _flags)
103             if(ch == 'm') return true;
104         return false;
105     }
106 
107     /// returns match
108     auto match(string str)
109     {
110         auto m = std.regex.match(str, _regex);
111         string[] result;
112         foreach(mat ; m)
113             result ~= mat.hit;
114         return result;
115     }
116 
117     /// matchAll - The Script will implement this as an iterator once generators are a thing
118     auto matchAll(string str)
119     {
120         auto m = std.regex.matchAll(str, _regex);
121         return m;
122     }
123 
124     /// replace
125     auto replace(string str, string fmt)
126     {
127         if(global)
128             return std.regex.replaceAll(str, _regex, fmt);
129         else
130             return std.regex.replaceFirst(str, _regex, fmt);
131     }
132 
133     /// replace only the first occurrence.
134     auto replaceFirst(string str, string fmt)
135     {
136         string r = std.regex.replaceFirst(str, _regex, fmt);
137         return r;
138     }
139 
140     /// search
141     auto search(string str)
142     {
143         auto m = std.regex.match(str, _regex);
144         if(m.pre.length == str.length)
145             return -1;
146         return m.pre.length;
147     }
148 
149     /// split
150     auto split(string str)
151     {
152         auto result = std.regex.split(str, _regex);
153         return result;
154     }
155 
156     /// exec
157     string[] exec(string str)
158     {
159         string[] result;
160         std.regex.Captures!string mat;
161         if(str == _currentExec)
162         {
163             if(_lastIndex >= _currentExec.length)
164                 return [];
165             mat = std.regex.matchFirst(str[_lastIndex..$], _regex);
166         }
167         else
168         {
169             if(str.length < 1)
170                 return [];
171             _currentExec = str;
172             _lastIndex = 0;
173             mat = std.regex.matchFirst(str, _regex);
174         }
175         if(!mat.empty)
176             _lastIndex += mat.hit.length;
177         else
178             return [];
179         // result ~= mat.hit;
180         foreach(value ; mat)
181         {
182             result ~= value;
183             _lastIndex += value.length;
184         }
185         return result;
186     }
187 
188     /// test
189     bool test(string str)
190     {
191         auto result = exec(str);
192         return result != null;
193     }
194 
195     /// get the string representation
196     override string toString() const 
197     {
198         return "/" ~ _source ~ "/" ~ _flags;
199     }
200 
201 private:
202     string _currentExec; // change _matches if this changes
203     size_t _lastIndex;
204 
205     string _source; // keep track of source
206     string _flags; // keep track of flags
207     std.regex.Regex!char _regex;
208 }
209 
210 /**
211  * Initializes the RegExp constructor. This is not necessary as regex literals are a first class
212  * language feature. Documentation for this library can be found at
213  * https://pillager86.github.io/dmildew/RegExp.html
214  * Params:
215  *  interpreter = The Interpreter instance to load the RegExp constructor into.
216  */
217 void initializeRegExpLibrary(Interpreter interpreter)
218 {
219     ScriptAny ctor = new ScriptFunction("RegExp", &native_RegExp_ctor, true);
220     ctor["prototype"] = getRegExpProto();
221     ctor["prototype"]["constructor"] = ctor;
222 
223     interpreter.forceSetGlobal("RegExp", ctor, false);
224 }
225 
226 /// Get the RegExp prototype. This is public because the VM needs it.
227 ScriptObject getRegExpProto()
228 {
229     if(_regExpProto is null)
230     {
231         _regExpProto = new ScriptObject("RegExp", null);
232         
233         _regExpProto.addGetterProperty("flags", new ScriptFunction("RegExp.prototype.flags", &native_RegExp_p_flags));
234         _regExpProto.addGetterProperty("lastIndex", new ScriptFunction("RegExp.prototype.lastIndex",
235                 &native_RegExp_p_lastIndex));
236         _regExpProto.addSetterProperty("lastIndex", new ScriptFunction("RegExp.prototype.lastIndex",
237                 &native_RegExp_p_lastIndex));
238         _regExpProto.addGetterProperty("source", new ScriptFunction("RegExp.prototype.source", 
239                 &native_RegExp_p_source));
240         
241         _regExpProto["dotAll"] = new ScriptFunction("RegExp.prototype.dotAll", &native_RegExp_dotAll);        
242         _regExpProto["global"] = new ScriptFunction("RegExp.prototype.global", &native_RegExp_global);
243         _regExpProto["ignoreCase"] = new ScriptFunction("RegExp.prototype.ignoreCase", &native_RegExp_ignoreCase);
244         _regExpProto["multiline"] = new ScriptFunction("RegExp.prototype.multiline", &native_RegExp_multiline);
245         _regExpProto["match"] = new ScriptFunction("RegExp.prototype.match", &native_RegExp_match);
246         _regExpProto["matchAll"] = new ScriptFunction("RegExp.prototype.matchAll", &native_RegExp_matchAll);
247         _regExpProto["replace"] = new ScriptFunction("RegExp.prototype.replace", &native_RegExp_replace);
248         _regExpProto["search"] = new ScriptFunction("RegExp.prototype.search", &native_RegExp_search);
249         _regExpProto["split"] = new ScriptFunction("RegExp.prototype.split", &native_RegExp_split);
250         _regExpProto["exec"] = new ScriptFunction("RegExp.prototype.exec", &native_RegExp_exec);
251         _regExpProto["test"] = new ScriptFunction("RegExp.prototype.test", &native_RegExp_test);
252     }
253     return _regExpProto;
254 }
255 
256 private ScriptObject _regExpProto;
257 
258 private ScriptAny native_RegExp_ctor(Environment env, ScriptAny* thisObj, ScriptAny[] args, ref NativeFunctionError nfe)
259 {
260     if(!thisObj.isObject)
261         return ScriptAny.UNDEFINED;
262     auto obj = thisObj.toValue!ScriptObject;
263     if(args.length < 1)
264     {
265         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
266         return ScriptAny.UNDEFINED;
267     }
268     auto pattern = args[0].toString();
269     auto flags = args.length > 1 ? args[1].toString() : "";
270     try 
271     {
272         obj.nativeObject = new ScriptRegExp(pattern, flags);
273     }
274     catch(std.regex.RegexException rex)
275     {
276         nfe = NativeFunctionError.RETURN_VALUE_IS_EXCEPTION;
277         return ScriptAny(rex.msg);
278     }
279     return ScriptAny.UNDEFINED;
280 }
281 
282 private ScriptAny native_RegExp_p_flags(Environment env, ScriptAny* thisObj,
283                                         ScriptAny[] args, ref NativeFunctionError nfe)
284 {
285     auto regExp = thisObj.toNativeObject!ScriptRegExp;
286     if(regExp is null)
287     {
288         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
289         return ScriptAny.UNDEFINED;
290     }
291     return ScriptAny(regExp.flags);
292 }
293 
294 private ScriptAny native_RegExp_p_lastIndex(Environment env, ScriptAny* thisObj,
295                                             ScriptAny[] args, ref NativeFunctionError nfe)
296 {
297     auto regExp = thisObj.toNativeObject!ScriptRegExp;
298     if(regExp is null)
299     {
300         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
301         return ScriptAny.UNDEFINED;
302     }
303     if(args.length < 1)
304         return ScriptAny(regExp.lastIndex);
305     immutable index = args[0].toValue!size_t;
306     return ScriptAny(regExp.lastIndex = index);
307 }
308 
309 private ScriptAny native_RegExp_p_source(Environment env, ScriptAny* thisObj,
310                                          ScriptAny[] args, ref NativeFunctionError nfe)
311 {
312     auto regExp = thisObj.toNativeObject!ScriptRegExp;
313     if(regExp is null)
314     {
315         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
316         return ScriptAny.UNDEFINED;
317     }
318     return ScriptAny(regExp.source);
319 }
320 
321 private ScriptAny native_RegExp_dotAll(Environment env, ScriptAny* thisObj,
322                                        ScriptAny[] args, ref NativeFunctionError nfe)
323 {
324     auto regExp = thisObj.toNativeObject!ScriptRegExp;
325     if(regExp is null)
326     {
327         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
328         return ScriptAny.UNDEFINED;
329     }
330     return ScriptAny(regExp.dotAll());
331 }
332 
333 private ScriptAny native_RegExp_global(Environment env, ScriptAny* thisObj,
334                                        ScriptAny[] args, ref NativeFunctionError nfe)
335 {
336     auto regExp = thisObj.toNativeObject!ScriptRegExp;
337     if(regExp is null)
338     {
339         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
340         return ScriptAny.UNDEFINED;
341     }
342     return ScriptAny(regExp.global());
343 }
344 
345 private ScriptAny native_RegExp_ignoreCase(Environment env, ScriptAny* thisObj,
346                                        ScriptAny[] args, ref NativeFunctionError nfe)
347 {
348     auto regExp = thisObj.toNativeObject!ScriptRegExp;
349     if(regExp is null)
350     {
351         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
352         return ScriptAny.UNDEFINED;
353     }
354     return ScriptAny(regExp.ignoreCase());
355 }
356 
357 private ScriptAny native_RegExp_multiline(Environment env, ScriptAny* thisObj,
358                                        ScriptAny[] args, ref NativeFunctionError nfe)
359 {
360     auto regExp = thisObj.toNativeObject!ScriptRegExp;
361     if(regExp is null)
362     {
363         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
364         return ScriptAny.UNDEFINED;
365     }
366     return ScriptAny(regExp.multiline());
367 }
368 
369 private ScriptAny native_RegExp_match(Environment env, ScriptAny* thisObj,
370                                        ScriptAny[] args, ref NativeFunctionError nfe)
371 {
372     auto regExp = thisObj.toNativeObject!ScriptRegExp;
373     if(regExp is null)
374     {
375         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
376         return ScriptAny.UNDEFINED;
377     }
378     if(args.length < 1)
379     {
380         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
381         return ScriptAny.UNDEFINED;
382     }
383     auto str = args[0].toString();
384     auto result = regExp.match(str); // @suppress(dscanner.suspicious.unmodified)
385     return ScriptAny(result);
386 }
387 
388 private ScriptAny native_RegExp_matchAll(Environment env, ScriptAny* thisObj,
389                                          ScriptAny[] args, ref NativeFunctionError nfe)
390 {
391     import std.concurrency: yield;
392     import mildew.stdlib.generator: ScriptGenerator, getGeneratorPrototype;
393 
394     auto regExp = thisObj.toNativeObject!ScriptRegExp;
395     if(regExp is null)
396     {
397         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
398         return ScriptAny.UNDEFINED;
399     }
400     if(args.length < 1)
401     {
402         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
403         return ScriptAny.UNDEFINED;
404     }
405     auto str = args[0].toString();
406     ScriptAny func(Environment env, ScriptAny* thisObj, ScriptAny[] args, ref NativeFunctionError nfe)
407     {
408         auto matches = regExp.matchAll(str);
409         foreach(match; matches)
410             yield!ScriptAny(ScriptAny(match.hit));
411         return ScriptAny.UNDEFINED;
412     }
413     auto generator = new ScriptGenerator(env, new ScriptFunction("iterator", &func), []);
414     auto result = new ScriptObject("Iterator", getGeneratorPrototype, generator);
415     return ScriptAny(result);
416 }
417 
418 private ScriptAny native_RegExp_replace(Environment env, ScriptAny* thisObj,
419                                         ScriptAny[] args, ref NativeFunctionError nfe)
420 {
421     auto regExp = thisObj.toNativeObject!ScriptRegExp;
422     if(regExp is null)
423     {
424         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
425         return ScriptAny.UNDEFINED;
426     }
427     if(args.length < 2)
428     {
429         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
430         return ScriptAny.UNDEFINED;
431     }
432     auto str = args[0].toString();
433     auto fmt = args[1].toString();
434     return ScriptAny(regExp.replace(str, fmt));
435 }
436 
437 private ScriptAny native_RegExp_search(Environment env, ScriptAny* thisObj,
438                                        ScriptAny[] args, ref NativeFunctionError nfe)
439 {
440     auto regExp = thisObj.toNativeObject!ScriptRegExp;
441     if(regExp is null)
442     {
443         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
444         return ScriptAny.UNDEFINED;
445     }
446     if(args.length < 1)
447     {
448         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
449         return ScriptAny.UNDEFINED;
450     }
451     auto str = args[0].toString();
452     return ScriptAny(regExp.search(str));
453 }
454 
455 private ScriptAny native_RegExp_split(Environment env, ScriptAny* thisObj,
456                                        ScriptAny[] args, ref NativeFunctionError nfe)
457 {
458     auto regExp = thisObj.toNativeObject!ScriptRegExp;
459     if(regExp is null)
460     {
461         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
462         return ScriptAny.UNDEFINED;
463     }
464     if(args.length < 1)
465     {
466         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
467         return ScriptAny.UNDEFINED;
468     }
469     auto str = args[0].toString();
470     return ScriptAny(regExp.split(str));
471 }
472 
473 private ScriptAny native_RegExp_exec(Environment env, ScriptAny* thisObj,
474                                        ScriptAny[] args, ref NativeFunctionError nfe)
475 {
476     auto regExp = thisObj.toNativeObject!ScriptRegExp;
477     if(regExp is null)
478     {
479         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
480         return ScriptAny.UNDEFINED;
481     }
482     if(args.length < 1)
483     {
484         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
485         return ScriptAny.UNDEFINED;
486     }
487     auto str = args[0].toString();
488     auto result = regExp.exec(str); // @suppress(dscanner.suspicious.unmodified)
489     return ScriptAny(regExp.exec(str));
490 }
491 
492 private ScriptAny native_RegExp_test(Environment env, ScriptAny* thisObj,
493                                        ScriptAny[] args, ref NativeFunctionError nfe)
494 {
495     auto regExp = thisObj.toNativeObject!ScriptRegExp;
496     if(regExp is null)
497     {
498         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
499         return ScriptAny.UNDEFINED;
500     }
501     if(args.length < 1)
502     {
503         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
504         return ScriptAny.UNDEFINED;
505     }
506     auto str = args[0].toString();
507     return ScriptAny(regExp.test(str));
508 }
509 
510 unittest
511 {
512     import std.stdio: writeln, writefln;
513     auto testString = "foo bar foo bar foo";
514     auto testRegexp = new ScriptRegExp("foo", "g");
515     auto rg2 = new ScriptRegExp("bar");
516     auto result = testRegexp.exec(testString);
517     assert(result != null);
518     while(result)
519     {
520         writeln(result);
521         result = testRegexp.exec(testString);
522     }
523     writeln(rg2.search(testString));
524 }