Bygex - Regex implementation for byond.

Source-code for the dll can be found here under LGPL license: https://github.com/Carn/bygex It is merely an interface to boost::regex. It uses perl syntax (non-recursive). Guidelines for use: var/datum/regex/Rgx = regex_findall(haystack, regex_expression) for(var/i=1, i<=Rgx.matches.len, ++i) world << Rgx.str(i) The implementation is different to the one other regex-in-byond library I found. This implementation only returns a string containing the position and length of each match and submatch. This uses far less memory than also passing back the matched strings (especially in large files like dream-maker maps). Note: Regex indexes begin at 1 (since byond lists begin at 1), unlike traditional regex. If we are using a procedure such as regex_findall(), with 3 sub-expressions, match 1 will be the overall match. Match 2 will be subexpression 1, match 3 will be subexpression 2.... Match 5 will be the overall match of the next match...and so on. To ease use, there is /datum/regex/var/anchors which is a numerical value which can be used inside loops like so: for(var/i=1, i<=Rgx.matches.len, i+=Rgx.anchors) world << Rgx.str(i+1) This will print the first submatch of each match to world.
2025-12-10 18:22:39 +00:00 · 2013-11-21 18:54:01 +00:00
parent 60f644c401
commit a9bea16ac8
5 changed files with 112 additions and 29 deletions
--- a/baystation12.dme
+++ b/baystation12.dme
@@ -16,6 +16,7 @@
 #include "code\setup.dm"
 #include "code\stylesheet.dm"
 #include "code\world.dm"
+#include "code\__HELPERS\bygex.dm"
 #include "code\__HELPERS\files.dm"
 #include "code\__HELPERS\game.dm"
 #include "code\__HELPERS\global_lists.dm"
--- a/baystation12.int
+++ b/baystation12.int
@@ -1,6 +1,9 @@
 // BEGIN_INTERNALS
 /*
 MAP_ICON_TYPE: 0
+WINDOW: code\__HELPERS\bygex.dm
+DIR: code code\__HELPERS 
+FILE: code\__HELPERS\bygex.dm
 AUTO_FILE_DIR: OFF
 */
 // END_INTERNALS
--- a/bygex.dll
+++ b/bygex.dll
--- a/code/__HELPERS/bygex.dm
+++ b/code/__HELPERS/bygex.dm
@@ -0,0 +1,107 @@
+#ifndef LIBREGEX_LIBRARY
+	#define LIBREGEX_LIBRARY "bygex"
+#endif
+
+proc
+	regEx_compare(str, exp)
+		return new /datum/regex(str, exp, call(LIBREGEX_LIBRARY, "regEx_compare")(str, exp))
+
+	regex_compare(str, exp)
+		return new /datum/regex(str, exp, call(LIBREGEX_LIBRARY, "regex_compare")(str, exp))
+
+	regEx_find(str, exp)
+		return new /datum/regex(str, exp, call(LIBREGEX_LIBRARY, "regEx_find")(str, exp))
+
+	regex_find(str, exp)
+		return new /datum/regex(str, exp, call(LIBREGEX_LIBRARY, "regex_find")(str, exp))
+
+	regEx_replaceall(str, exp, fmt)
+		return call(LIBREGEX_LIBRARY, "regEx_replaceall")(str, exp, fmt)
+
+	regex_replaceall(str, exp, fmt)
+		return call(LIBREGEX_LIBRARY, "regex_replaceall")(str, exp, fmt)
+
+	replacetextEx(str, exp, fmt)
+		return call(LIBREGEX_LIBRARY, "regEx_replaceallliteral")(str, exp, fmt)
+
+	replacetext(str, exp, fmt)
+		return call(LIBREGEX_LIBRARY, "regex_replaceallliteral")(str, exp, fmt)
+
+	regEx_replace(str, exp, fmt)
+		return call(LIBREGEX_LIBRARY, "regEx_replace")(str, exp, fmt)
+
+	regex_replace(str, exp, fmt)
+		return call(LIBREGEX_LIBRARY, "regex_replace")(str, exp, fmt)
+
+	regEx_findall(str, exp)
+		return new /datum/regex(str, exp, call(LIBREGEX_LIBRARY, "regEx_findall")(str, exp))
+
+	regex_findall(str, exp)
+		return new /datum/regex(str, exp, call(LIBREGEX_LIBRARY, "regex_findall")(str, exp))
+
+
+//upon calling a regex match or search, a /datum/regex object is created with str(haystack) and exp(needle) variables set
+//it also contains a list(matches) of /datum/match objects, each of which holds the position and length of the match
+//matched strings are not returned from the dll, in order to save on memory allocation for large numbers of strings
+//instead, you can use regex.str(matchnum) to fetch this string as needed.
+//likewise you can also use regex.pos(matchnum) and regex.len(matchnum) as shorthands
+/datum/regex
+	var/str
+	var/exp
+	var/error
+	var/anchors = 0
+	var/list/matches = list()
+
+	New(str, exp, results)
+		src.str = str
+		src.exp = exp
+
+		if(findtext(results, "Err", 1, 4))	//error message
+			src.error = results
+		else
+			var/list/L = params2list(results)
+			var/list/M
+			var{i;j}
+			for(i in L)
+				M = L[i]
+				for(j=2, j<=M.len, j+=2)
+					matches += new /datum/match(text2num(M[j-1]),text2num(M[j]))
+			anchors = (j-2)/2
+		return matches
+
+	proc
+		str(i)
+			if(!i)	return str
+			var/datum/match/M = matches[i]
+			return copytext(str, M.pos, M.pos+M.len)
+
+		pos(i)
+			if(!i)	return 1
+			var/datum/match/M = matches[i]
+			return M.pos
+
+		len(i)
+			if(!i)	return length(str)
+			var/datum/match/M = matches[i]
+			return M.len
+
+		end(i)
+			if(!i) return length(str)
+			var/datum/match/M = matches[i]
+			return M.pos + M.len
+
+		report()	//debug tool
+			. = ":: RESULTS ::\n:: str :: [html_encode(str)]\n:: exp :: [html_encode(exp)]\n:: anchors :: [anchors]"
+			if(error)
+				. += "\n<font color='red'>[error]</font>"
+				return
+			for(var/i=1, i<=matches.len, ++i)
+				. += "\nMatch[i]\n\t[html_encode(str(i))]\n\tpos=[pos(i)] len=[len(i)]"
+
+/datum/match
+	var/pos
+	var/len
+
+	New(pos, len)
+		src.pos = pos
+		src.len = len
--- a/code/__HELPERS/text.dm
+++ b/code/__HELPERS/text.dm
@@ -194,35 +194,7 @@ proc/checkhtml(var/t)
 /*
 * Text modification
 */
-/proc/replacetext(text, find, replacement)
-	var/find_len = length(find)
-	if(find_len < 1)	return text
-	. = ""
-	var/last_found = 1
-	while(1)
-		var/found = findtext(text, find, last_found, 0)
-		. += copytext(text, last_found, found)
-		if(found)
-			. += replacement
-			last_found = found + find_len
-			continue
-		return .
-
-/proc/replacetextEx(text, find, replacement)
-	var/find_len = length(find)
-	if(find_len < 1)	return text
-	. = ""
-	var/last_found = 1
-	while(1)
-		var/found = findtextEx(text, find, last_found, 0)
-		. += copytext(text, last_found, found)
-		if(found)
-			. += replacement
-			last_found = found + find_len
-			continue
-		return .
-
-//Adds 'u' number of zeros ahead of the text 't'
+ //Adds 'u' number of zeros ahead of the text 't'
 /proc/add_zero(t, u)
 	while (length(t) < u)
 		t = "0[t]"