Add a new and improved mapmerge (#33869)

Key benefits of the new mapmerge include: multi-Z support, effective reuse of deleted keys, automatic handling of key overflow, and utilizing a git pre-commit hook to eliminate the need to run batch files manually.
2025-02-26 09:04:50 +00:00 · 2017-12-28 13:01:34 -08:00
parent d95d4cb9d6
commit 9639061433
14 changed files with 801 additions and 0 deletions
--- a/tools/mapmerge2/dmm.py
+++ b/tools/mapmerge2/dmm.py
@@ -0,0 +1,459 @@
+# Tools for working with DreamMaker maps
+
+import io
+import bidict
+import random
+from collections import namedtuple
+
+TGM_HEADER = "//MAP CONVERTED BY dmm2tgm.py THIS HEADER COMMENT PREVENTS RECONVERSION, DO NOT REMOVE"
+ENCODING = 'utf-8'
+
+Coordinate = namedtuple('Coordinate', ['x', 'y', 'z'])
+
+class DMM:
+    __slots__ = ['key_length', 'size', 'dictionary', 'grid', 'header']
+
+    def __init__(self, key_length, size):
+        self.key_length = key_length
+        self.size = size
+        self.dictionary = bidict.bidict()
+        self.grid = {}
+        self.header = None
+
+    @staticmethod
+    def from_file(fname):
+        # stream the file rather than forcing all its contents to memory
+        with open(fname, 'r', encoding=ENCODING) as f:
+            return _parse(iter(lambda: f.read(1), ''))
+
+    @staticmethod
+    def from_bytes(bytes):
+        return _parse(bytes.decode(ENCODING))
+
+    def to_file(self, fname, tgm = True):
+        with open(fname, 'w', newline='\n', encoding=ENCODING) as f:
+            (save_tgm if tgm else save_dmm)(self, f)
+
+    def to_bytes(self, tgm = True):
+        bio = io.BytesIO()
+        with io.TextIOWrapper(bio, newline='\n', encoding=ENCODING) as f:
+            (save_tgm if tgm else save_dmm)(self, f)
+            f.flush()
+            return bio.getvalue()
+
+    def generate_new_key(self):
+        # ensure that free keys exist by increasing the key length if necessary
+        free_keys = (BASE ** self.key_length) - len(self.dictionary)
+        while free_keys <= 0:
+            self.key_length += 1
+            free_keys = (BASE ** self.key_length) - len(self.dictionary)
+
+        # choose one of the free keys at random
+        key = 0
+        while free_keys:
+            if key not in self.dictionary:
+                # this construction is used to avoid needing to construct the
+                # full set in order to random.choice() from it
+                if random.random() < 1 / free_keys:
+                    return key
+                free_keys -= 1
+            key += 1
+
+        raise RuntimeError("ran out of keys, this shouldn't happen")
+
+    @property
+    def coords_zyx(self):
+        for z in range(1, self.size.z + 1):
+            for y in range(1, self.size.y + 1):
+                for x in range(1, self.size.x + 1):
+                    yield (z, y, x)
+
+    @property
+    def coords_z(self):
+        return range(1, self.size.z + 1)
+
+    @property
+    def coords_yx(self):
+        for y in range(1, self.size.y + 1):
+            for x in range(1, self.size.x + 1):
+                yield (y, x)
+
+# ----------
+# key handling
+
+# Base 52 a-z A-Z dictionary for fast conversion
+BASE = 52
+base52 = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+base52_r = {x: i for i, x in enumerate(base52)}
+assert len(base52) == BASE and len(base52_r) == BASE
+
+def key_to_num(key):
+    num = 0
+    for ch in key:
+        num = BASE * num + base52_r[ch]
+    return num
+
+def num_to_key(num, key_length):
+    if num >= BASE ** key_length:
+        raise KeyTooLarge(f"num={num} does not fit in key_length={key_length}")
+
+    result = ''
+    while num:
+        result = base52[num % BASE] + result
+        num //= BASE
+
+    assert len(result) <= key_length
+    return base52[0] * (key_length - len(result)) + result
+
+class KeyTooLarge(Exception):
+    pass
+
+# ----------
+# An actual atom parser
+
+def parse_map_atom(atom):
+    try:
+        i = atom.index('{')
+    except ValueError:
+        return atom, {}
+
+    path, rest = atom[:i], atom[i+1:]
+    vars = {}
+
+    in_string = False
+    in_name = False
+    escaping = False
+    current_name = ''
+    current = ''
+    for ch in rest:
+        if escaping:
+            escaping = False
+            current += ch
+        elif ch == '\\':
+            escaping = True
+        elif ch == '"':
+            in_string = not in_string
+            current += ch
+        elif in_string:
+            current += ch
+        elif ch == ';':
+            vars[current_name.strip()] = current.strip()
+            current_name = current = ''
+        elif ch == '=':
+            current_name = current
+            current = ''
+        elif ch == '}':
+            vars[current_name.strip()] = current.strip()
+            break
+        elif ch not in ' ':
+            current += ch
+
+    return path, vars
+
+# ----------
+# TGM writer
+
+def save_tgm(dmm, output):
+    output.write(f"{TGM_HEADER}\n")
+    if dmm.header:
+        output.write(f"{dmm.header}\n")
+
+    # write dictionary in tgm format
+    for key, value in sorted(dmm.dictionary.items()):
+        output.write(f'"{num_to_key(key, dmm.key_length)}" = (\n')
+        for idx, thing in enumerate(value):
+            in_quote_block = False
+            in_varedit_block = False
+            for char in thing:
+                if in_quote_block:
+                    if char == '"':
+                        in_quote_block = False
+                    output.write(char)
+                elif char == '"':
+                    in_quote_block = True
+                    output.write(char)
+                elif not in_varedit_block:
+                    if char == "{":
+                        in_varedit_block = True
+                        output.write("{\n\t")
+                    else:
+                        output.write(char)
+                elif char == ";":
+                    output.write(";\n\t")
+                elif char == "}":
+                    output.write("\n\t}")
+                    in_varedit_block = False
+                else:
+                    output.write(char)
+            if idx < len(value) - 1:
+                output.write(",\n")
+        output.write(")\n")
+
+    # thanks to YotaXP for finding out about this one
+    max_x, max_y, max_z = dmm.size
+    for z in range(1, max_z + 1):
+        output.write("\n")
+        for x in range(1, max_x + 1):
+            output.write(f"({x},{1},{z}) = {{\"\n")
+            for y in range(1, max_y + 1):
+                output.write(f"{num_to_key(dmm.grid[x, y, z], dmm.key_length)}\n")
+            output.write("\"}\n")
+
+# ----------
+# DMM writer
+
+def save_dmm(dmm, output):
+    if dmm.header:
+        output.write(f"{dmm.header}\n")
+
+    # writes a tile dictionary the same way Dreammaker does
+    for key, value in sorted(dmm.dictionary.items()):
+        output.write(f'"{num_to_key(key, dmm.key_length)}" = ({",".join(value)})\n')
+
+    output.write("\n")
+
+    # writes a map grid the same way Dreammaker does
+    max_x, max_y, max_z = dmm.size
+    for z in range(1, max_z + 1):
+        output.write(f"(1,1,{z}) = {{\"\n")
+
+        for y in range(1, max_y + 1):
+            for x in range(1, max_x + 1):
+                try:
+                    output.write(num_to_key(dmm.grid[x, y, z], dmm.key_length))
+                except KeyError:
+                    print(f"Key error: ({x}, {y}, {z})")
+            output.write("\n")
+        output.write("\"}\n")
+
+# ----------
+# Parser
+
+def _parse(map_raw_text):
+    in_comment_line = False
+    comment_trigger = False
+
+    in_quote_block = False
+    in_key_block = False
+    in_data_block = False
+    in_varedit_block = False
+    after_data_block = False
+    escaping = False
+    skip_whitespace = False
+
+    dictionary = bidict.bidict()
+    duplicate_keys = {}
+    curr_key_len = 0
+    curr_key = 0
+    curr_datum = ""
+    curr_data = list()
+
+    in_map_block = False
+    in_coord_block = False
+    in_map_string = False
+    iter_x = 0
+    adjust_y = True
+
+    curr_num = ""
+    reading_coord = "x"
+
+    key_length = 0
+
+    maxx = 0
+    maxy = 0
+    maxz = 0
+
+    curr_x = 0
+    curr_y = 0
+    curr_z = 0
+    grid = dict()
+
+    it = iter(map_raw_text)
+
+    # map block
+    for char in it:
+        if char == "\n":
+            in_comment_line = False
+            comment_trigger = False
+            continue
+        elif in_comment_line:
+            continue
+        elif char == "\t":
+            continue
+
+        if char == "/" and not in_quote_block:
+            if comment_trigger:
+                in_comment_line = True
+                continue
+            else:
+                comment_trigger = True
+        else:
+            comment_trigger = False
+
+        if in_data_block:
+
+            if in_varedit_block:
+
+                if in_quote_block:
+                    if char == "\\":
+                        curr_datum = curr_datum + char
+                        escaping = True
+
+                    elif escaping:
+                        curr_datum = curr_datum + char
+                        escaping = False
+
+                    elif char == "\"":
+                        curr_datum = curr_datum + char
+                        in_quote_block = False
+
+                    else:
+                        curr_datum = curr_datum + char
+
+                else:
+                    if skip_whitespace and char == " ":
+                        skip_whitespace = False
+                        continue
+                    skip_whitespace = False
+
+                    if char == "\"":
+                        curr_datum = curr_datum + char
+                        in_quote_block = True
+
+                    elif char == ";":
+                        skip_whitespace = True
+                        curr_datum = curr_datum + char
+
+                    elif char == "}":
+                        curr_datum = curr_datum + char
+                        in_varedit_block = False
+
+                    else:
+                        curr_datum = curr_datum + char
+
+            elif char == "{":
+                curr_datum = curr_datum + char
+                in_varedit_block = True
+
+            elif char == ",":
+                curr_data.append(curr_datum)
+                curr_datum = ""
+
+            elif char == ")":
+                curr_data.append(curr_datum)
+                curr_data = tuple(curr_data)
+                try:
+                    dictionary[curr_key] = curr_data
+                except bidict.ValueDuplicationError:
+                    # if the map has duplicate values, eliminate them now
+                    duplicate_keys[curr_key] = dictionary.inv[curr_data]
+                curr_data = list()
+                curr_datum = ""
+                curr_key = 0
+                curr_key_len = 0
+                in_data_block = False
+                after_data_block = True
+
+            else:
+                curr_datum = curr_datum + char
+
+        elif in_key_block:
+            if char == "\"":
+                in_key_block = False
+                if key_length == 0:
+                    key_length = curr_key_len
+                else:
+                    assert key_length == curr_key_len
+            else:
+                curr_key = BASE * curr_key + base52_r[char]
+                curr_key_len += 1
+
+        # else we're looking for a key block, a data block or the map block
+        elif char == "\"":
+            in_key_block = True
+            after_data_block = False
+
+        elif char == "(":
+            if after_data_block:
+                in_coord_block = True
+                after_data_block = False
+                curr_key = 0
+                curr_key_len = 0
+                break
+            else:
+                in_data_block = True
+                after_data_block = False
+
+    # grid block
+    for char in it:
+        if in_coord_block:
+            if char == ",":
+                if reading_coord == "x":
+                    curr_x = int(curr_num)
+                    if curr_x > maxx:
+                        maxx = curr_x
+                    iter_x = 0
+                    curr_num = ""
+                    reading_coord = "y"
+                elif reading_coord == "y":
+                    curr_y = int(curr_num)
+                    if curr_y > maxy:
+                        maxy = curr_y
+                    curr_num = ""
+                    reading_coord = "z"
+                else:
+                    raise ValueError("too many dimensions")
+
+            elif char == ")":
+                curr_z = int(curr_num)
+                if curr_z > maxz:
+                    maxz = curr_z
+                in_coord_block = False
+                reading_coord = "x"
+                curr_num = ""
+
+            else:
+                curr_num = curr_num + char
+
+        elif in_map_string:
+            if char == "\"":
+                in_map_string = False
+                adjust_y = True
+                curr_y -= 1
+
+            elif char == "\n":
+                if adjust_y:
+                    adjust_y = False
+                else:
+                    curr_y += 1
+                if curr_x > maxx:
+                    maxx = curr_x
+                if iter_x > 1:
+                    curr_x = 1
+                iter_x = 0
+
+            else:
+                curr_key = BASE * curr_key + base52_r[char]
+                curr_key_len += 1
+                if curr_key_len == key_length:
+                    iter_x += 1
+                    if iter_x > 1:
+                        curr_x += 1
+
+                    grid[curr_x, curr_y, curr_z] = duplicate_keys.get(curr_key, curr_key)
+                    curr_key = 0
+                    curr_key_len = 0
+
+        # else look for coordinate block or a map string
+        elif char == "(":
+            in_coord_block = True
+        elif char == "\"":
+            in_map_string = True
+
+    if curr_y > maxy:
+        maxy = curr_y
+
+    data = DMM(key_length, Coordinate(maxx, maxy, maxz))
+    data.dictionary = dictionary
+    data.grid = grid
+    return data