1 /*
2 This module implements the Mildew RegExp class.
3 
4 ────────────────────────────────────────────────────────────────────────────────
5 
6 Copyright (C) 2021 pillager86.rf.gd
7 
8 This program is free software: you can redistribute it and/or modify it under 
9 the terms of the GNU General Public License as published by the Free Software 
10 Foundation, either version 3 of the License, or (at your option) any later 
11 version.
12 
13 This program is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
15 PARTICULAR PURPOSE.  See the GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License along with 
18 this program.  If not, see <https://www.gnu.org/licenses/>.
19 */
20 module mildew.stdlib.regexp;
21 
22 static import std.regex;
23 
24 import mildew.environment;
25 import mildew.interpreter;
26 import mildew.types;
27 
28 /**
29  * Class that encapsulates regular expressions. The D struct Regex cannot be directly stored in a ScriptObject
30  */
31 class ScriptRegExp
32 {
33 public:
34     /// ctor
35     this(in string pattern, in string flags="")
36     {
37         _regex = std.regex.regex(pattern, flags);
38 
39         _source = pattern;
40 
41         char[] unsortedFlags = flags.dup;
42         if(flags.length > 0)
43         {
44             for(size_t i = 0; i < unsortedFlags.length - 1; ++i)
45             {
46                 for(size_t j = 0; j < unsortedFlags.length - i - 1; ++j)
47                 {
48                     if(unsortedFlags[j] > unsortedFlags[j+1])
49                     {
50                         immutable swap = unsortedFlags[j];
51                         unsortedFlags[j] = unsortedFlags[j+1];
52                         unsortedFlags[j+1] = swap;
53                     }
54                 }
55             }
56         }
57         _flags = cast(string)unsortedFlags;
58     }
59 
60     /// flags property
61     string flags() const { return _flags; }
62 
63     /// last index property
64     size_t lastIndex() const { return _lastIndex; }
65     /// last index property
66     size_t lastIndex(size_t li)
67     {
68         return _lastIndex = li;
69     }
70 
71     /// source property
72     string source() const { return _source; }
73 
74     /// whether or not 's' flag was used
75     bool dotAll() const 
76     {
77         foreach(ch ; _flags)
78             if(ch == 's') return true;
79         return false;
80     }
81 
82     /// whether or not 'g' flag was used
83     bool global() const 
84     {
85         foreach(ch ; _flags)
86             if(ch == 'g') return true;
87         return false;
88     }
89 
90     /// whether or not 'i' flag was used
91     bool ignoreCase() const
92     {
93         foreach(ch ; _flags)
94             if(ch == 'i') return true;
95         return false;
96     }
97 
98     /// whether or not 'm' flag was used
99     bool multiline() const 
100     {
101         foreach(ch ; _flags)
102             if(ch == 'm') return true;
103         return false;
104     }
105 
106     /// returns match
107     auto match(string str)
108     {
109         auto m = std.regex.match(str, _regex);
110         string[] result;
111         foreach(mat ; m)
112             result ~= mat.hit;
113         return result;
114     }
115 
116     /// matchAll - The Script will implement this as an iterator once generators are a thing
117     auto matchAll(string str)
118     {
119         auto m = std.regex.matchAll(str, _regex);
120         return m;
121     }
122 
123     /// replace
124     auto replace(string str, string fmt)
125     {
126         if(global)
127             return std.regex.replaceAll(str, _regex, fmt);
128         else
129             return std.regex.replaceFirst(str, _regex, fmt);
130     }
131 
132     /// replace only the first occurrence.
133     auto replaceFirst(string str, string fmt)
134     {
135         string r = std.regex.replaceFirst(str, _regex, fmt);
136         return r;
137     }
138 
139     /// search
140     auto search(string str)
141     {
142         auto m = std.regex.match(str, _regex);
143         return m.pre.length;
144     }
145 
146     /// split
147     auto split(string str)
148     {
149         auto result = std.regex.split(str, _regex);
150         return result;
151     }
152 
153     /// exec
154     string[] exec(string str)
155     {
156         string[] result;
157         std.regex.Captures!string mat;
158         if(str == _currentExec)
159         {
160             if(_lastIndex >= _currentExec.length)
161                 return [];
162             mat = std.regex.matchFirst(str[_lastIndex..$], _regex);
163         }
164         else
165         {
166             if(str.length < 1)
167                 return [];
168             _currentExec = str;
169             _lastIndex = 0;
170             mat = std.regex.matchFirst(str, _regex);
171         }
172         if(!mat.empty)
173             _lastIndex += mat.hit.length;
174         else
175             return [];
176         // result ~= mat.hit;
177         foreach(value ; mat)
178         {
179             result ~= value;
180             _lastIndex += value.length;
181         }
182         return result;
183     }
184 
185     /// test
186     bool test(string str)
187     {
188         auto result = exec(str);
189         return result != null;
190     }
191 
192     /// get the string representation
193     override string toString() const 
194     {
195         return "/" ~ _source ~ "/" ~ _flags;
196     }
197 
198 private:
199     string _currentExec; // change _matches if this changes
200     size_t _lastIndex;
201 
202     string _source; // keep track of source
203     string _flags; // keep track of flags
204     std.regex.Regex!char _regex;
205 }
206 
207 /// Initializes the RegExp namespace. Not necessary if regex literals are used.
208 void initializeRegExpLibrary(Interpreter interpreter)
209 {
210     ScriptAny ctor = new ScriptFunction("RegExp", &native_RegExp_ctor, true);
211     ctor["prototype"] = getRegExpProto();
212     ctor["prototype"]["constructor"] = ctor;
213 
214     interpreter.forceSetGlobal("RegExp", ctor, false);
215 }
216 
217 /// Get the RegExp prototype. This is public because the VM needs it.
218 ScriptObject getRegExpProto()
219 {
220     if(_regExpProto is null)
221     {
222         _regExpProto = new ScriptObject("RegExp", null);
223         
224         _regExpProto.addGetterProperty("flags", new ScriptFunction("RegExp.prototype.flags", &native_RegExp_p_flags));
225         _regExpProto.addGetterProperty("lastIndex", new ScriptFunction("RegExp.prototype.lastIndex",
226                 &native_RegExp_p_lastIndex));
227         _regExpProto.addSetterProperty("lastIndex", new ScriptFunction("RegExp.prototype.lastIndex",
228                 &native_RegExp_p_lastIndex));
229         _regExpProto.addGetterProperty("source", new ScriptFunction("RegExp.prototype.source", 
230                 &native_RegExp_p_source));
231         
232         _regExpProto["dotAll"] = new ScriptFunction("RegExp.prototype.dotAll", &native_RegExp_dotAll);        
233         _regExpProto["global"] = new ScriptFunction("RegExp.prototype.global", &native_RegExp_global);
234         _regExpProto["ignoreCase"] = new ScriptFunction("RegExp.prototype.ignoreCase", &native_RegExp_ignoreCase);
235         _regExpProto["multiline"] = new ScriptFunction("RegExp.prototype.multiline", &native_RegExp_multiline);
236 
237         _regExpProto["match"] = new ScriptFunction("RegExp.prototype.match", &native_RegExp_match);
238         // TODO matchAll
239         _regExpProto["replace"] = new ScriptFunction("RegExp.prototype.replace", &native_RegExp_replace);
240         _regExpProto["search"] = new ScriptFunction("RegExp.prototype.search", &native_RegExp_search);
241         _regExpProto["split"] = new ScriptFunction("RegExp.prototype.split", &native_RegExp_split);
242         _regExpProto["exec"] = new ScriptFunction("RegExp.prototype.exec", &native_RegExp_exec);
243         _regExpProto["test"] = new ScriptFunction("RegExp.prototype.test", &native_RegExp_test);
244     }
245     return _regExpProto;
246 }
247 
248 private ScriptObject _regExpProto;
249 
250 ScriptAny native_RegExp_ctor(Environment env, ScriptAny* thisObj, ScriptAny[] args, ref NativeFunctionError nfe)
251 {
252     if(!thisObj.isObject)
253         return ScriptAny.UNDEFINED;
254     auto obj = thisObj.toValue!ScriptObject;
255     if(args.length < 1)
256     {
257         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
258         return ScriptAny.UNDEFINED;
259     }
260     auto pattern = args[0].toString();
261     auto flags = args.length > 1 ? args[1].toString() : "";
262     try 
263     {
264         obj.nativeObject = new ScriptRegExp(pattern, flags);
265     }
266     catch(std.regex.RegexException rex)
267     {
268         nfe = NativeFunctionError.RETURN_VALUE_IS_EXCEPTION;
269         return ScriptAny(rex.msg);
270     }
271     return ScriptAny.UNDEFINED;
272 }
273 
274 private ScriptAny native_RegExp_p_flags(Environment env, ScriptAny* thisObj,
275                                         ScriptAny[] args, ref NativeFunctionError nfe)
276 {
277     auto regExp = thisObj.toNativeObject!ScriptRegExp;
278     if(regExp is null)
279     {
280         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
281         return ScriptAny.UNDEFINED;
282     }
283     return ScriptAny(regExp.flags);
284 }
285 
286 private ScriptAny native_RegExp_p_lastIndex(Environment env, ScriptAny* thisObj,
287                                             ScriptAny[] args, ref NativeFunctionError nfe)
288 {
289     auto regExp = thisObj.toNativeObject!ScriptRegExp;
290     if(regExp is null)
291     {
292         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
293         return ScriptAny.UNDEFINED;
294     }
295     if(args.length < 1)
296         return ScriptAny(regExp.lastIndex);
297     immutable index = args[0].toValue!size_t;
298     return ScriptAny(regExp.lastIndex = index);
299 }
300 
301 private ScriptAny native_RegExp_p_source(Environment env, ScriptAny* thisObj,
302                                          ScriptAny[] args, ref NativeFunctionError nfe)
303 {
304     auto regExp = thisObj.toNativeObject!ScriptRegExp;
305     if(regExp is null)
306     {
307         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
308         return ScriptAny.UNDEFINED;
309     }
310     return ScriptAny(regExp.source);
311 }
312 
313 private ScriptAny native_RegExp_dotAll(Environment env, ScriptAny* thisObj,
314                                        ScriptAny[] args, ref NativeFunctionError nfe)
315 {
316     auto regExp = thisObj.toNativeObject!ScriptRegExp;
317     if(regExp is null)
318     {
319         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
320         return ScriptAny.UNDEFINED;
321     }
322     return ScriptAny(regExp.dotAll());
323 }
324 
325 private ScriptAny native_RegExp_global(Environment env, ScriptAny* thisObj,
326                                        ScriptAny[] args, ref NativeFunctionError nfe)
327 {
328     auto regExp = thisObj.toNativeObject!ScriptRegExp;
329     if(regExp is null)
330     {
331         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
332         return ScriptAny.UNDEFINED;
333     }
334     return ScriptAny(regExp.global());
335 }
336 
337 private ScriptAny native_RegExp_ignoreCase(Environment env, ScriptAny* thisObj,
338                                        ScriptAny[] args, ref NativeFunctionError nfe)
339 {
340     auto regExp = thisObj.toNativeObject!ScriptRegExp;
341     if(regExp is null)
342     {
343         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
344         return ScriptAny.UNDEFINED;
345     }
346     return ScriptAny(regExp.ignoreCase());
347 }
348 
349 private ScriptAny native_RegExp_multiline(Environment env, ScriptAny* thisObj,
350                                        ScriptAny[] args, ref NativeFunctionError nfe)
351 {
352     auto regExp = thisObj.toNativeObject!ScriptRegExp;
353     if(regExp is null)
354     {
355         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
356         return ScriptAny.UNDEFINED;
357     }
358     return ScriptAny(regExp.multiline());
359 }
360 
361 private ScriptAny native_RegExp_match(Environment env, ScriptAny* thisObj,
362                                        ScriptAny[] args, ref NativeFunctionError nfe)
363 {
364     auto regExp = thisObj.toNativeObject!ScriptRegExp;
365     if(regExp is null)
366     {
367         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
368         return ScriptAny.UNDEFINED;
369     }
370     if(args.length < 1)
371     {
372         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
373         return ScriptAny.UNDEFINED;
374     }
375     auto str = args[0].toString();
376     auto result = regExp.match(str); // @suppress(dscanner.suspicious.unmodified)
377     return ScriptAny(result);
378 }
379 
380 // TODO matchAll once iterators are implemented
381 
382 private ScriptAny native_RegExp_replace(Environment env, ScriptAny* thisObj,
383                                         ScriptAny[] args, ref NativeFunctionError nfe)
384 {
385     auto regExp = thisObj.toNativeObject!ScriptRegExp;
386     if(regExp is null)
387     {
388         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
389         return ScriptAny.UNDEFINED;
390     }
391     if(args.length < 2)
392     {
393         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
394         return ScriptAny.UNDEFINED;
395     }
396     auto str = args[0].toString();
397     auto fmt = args[1].toString();
398     return ScriptAny(regExp.replace(str, fmt));
399 }
400 
401 private ScriptAny native_RegExp_search(Environment env, ScriptAny* thisObj,
402                                        ScriptAny[] args, ref NativeFunctionError nfe)
403 {
404     auto regExp = thisObj.toNativeObject!ScriptRegExp;
405     if(regExp is null)
406     {
407         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
408         return ScriptAny.UNDEFINED;
409     }
410     if(args.length < 1)
411     {
412         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
413         return ScriptAny.UNDEFINED;
414     }
415     auto str = args[0].toString();
416     return ScriptAny(regExp.search(str));
417 }
418 
419 private ScriptAny native_RegExp_split(Environment env, ScriptAny* thisObj,
420                                        ScriptAny[] args, ref NativeFunctionError nfe)
421 {
422     auto regExp = thisObj.toNativeObject!ScriptRegExp;
423     if(regExp is null)
424     {
425         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
426         return ScriptAny.UNDEFINED;
427     }
428     if(args.length < 1)
429     {
430         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
431         return ScriptAny.UNDEFINED;
432     }
433     auto str = args[0].toString();
434     return ScriptAny(regExp.split(str));
435 }
436 
437 private ScriptAny native_RegExp_exec(Environment env, ScriptAny* thisObj,
438                                        ScriptAny[] args, ref NativeFunctionError nfe)
439 {
440     auto regExp = thisObj.toNativeObject!ScriptRegExp;
441     if(regExp is null)
442     {
443         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
444         return ScriptAny.UNDEFINED;
445     }
446     if(args.length < 1)
447     {
448         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
449         return ScriptAny.UNDEFINED;
450     }
451     auto str = args[0].toString();
452     auto result = regExp.exec(str); // @suppress(dscanner.suspicious.unmodified)
453     return ScriptAny(regExp.exec(str));
454 }
455 
456 private ScriptAny native_RegExp_test(Environment env, ScriptAny* thisObj,
457                                        ScriptAny[] args, ref NativeFunctionError nfe)
458 {
459     auto regExp = thisObj.toNativeObject!ScriptRegExp;
460     if(regExp is null)
461     {
462         nfe = NativeFunctionError.WRONG_TYPE_OF_ARG;
463         return ScriptAny.UNDEFINED;
464     }
465     if(args.length < 1)
466     {
467         nfe = NativeFunctionError.WRONG_NUMBER_OF_ARGS;
468         return ScriptAny.UNDEFINED;
469     }
470     auto str = args[0].toString();
471     return ScriptAny(regExp.test(str));
472 }
473 
474 unittest
475 {
476     import std.stdio: writeln, writefln;
477     auto testString = "foo bar foo bar foo";
478     auto testRegexp = new ScriptRegExp("foo", "g");
479     auto rg2 = new ScriptRegExp("bar");
480     auto result = testRegexp.exec(testString);
481     assert(result != null);
482     while(result)
483     {
484         writeln(result);
485         result = testRegexp.exec(testString);
486     }
487     writeln(rg2.search(testString));
488 }