From 8641eaa5389990686fb5b0449139b264a9b32d65 Mon Sep 17 00:00:00 2001 From: Alan Date: Tue, 1 Aug 2017 15:34:13 +0000 Subject: [PATCH] Output JSON and work on directories --- convert_cldr.py | 230 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 156 insertions(+), 74 deletions(-) diff --git a/convert_cldr.py b/convert_cldr.py index 8ecc213a5..7420f7cda 100755 --- a/convert_cldr.py +++ b/convert_cldr.py @@ -1,7 +1,8 @@ #!/usr/bin/python3 - +# # Copyright 2015 Daiki Ueno # 2016 Parag Nemade +# 2017 Alan # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as @@ -17,17 +18,36 @@ # License along with this program; if not, see # . -from gi.repository import Gdk -from xml.etree.ElementTree import ElementTree, Element +import glob +import json +import locale +import logging +import os import re +import sys +import xml.etree.ElementTree + +import gi +gi.require_version('GnomeDesktop', '3.0') # NOQA: E402 +from gi.repository import GnomeDesktop ESCAPE_PATTERN = re.compile(r'\\u\{([0-9A-Fa-f]+?)\}') ISO_PATTERN = re.compile(r'[A-E]([0-9]+)') +LOCALE_TO_XKB_OVERRIDES = { + 'af': 'za', + 'en': 'us', + 'en-GB': 'uk', + 'es-US': 'latam', + 'fr_CA': 'ca', + 'hi': 'in+bolnagri', + 'ky': 'kg', + 'nl-BE': 'be', + 'zu': None +} + def parse_single_key(value): - key = Element('key') - uc = 0 if hasattr(__builtins__, 'unichr'): def unescape(m): return chr(int(m.group(1), 16)) @@ -35,22 +55,55 @@ def parse_single_key(value): def unescape(m): return chr(int(m.group(1), 16)) value = ESCAPE_PATTERN.sub(unescape, value) - if len(value) > 1: - key.set('text', value) - uc = ord(value[0]) - keyval = Gdk.unicode_to_keyval(uc) - name = Gdk.keyval_name(keyval) - key.set('name', name) - return key + return value -def convert(source, tree): - root = Element('layout') +def parse_rows(keymap): + unsorted_rows = {} + for _map in keymap.iter('map'): + value = _map.get('to') + key = [parse_single_key(value)] + iso = _map.get('iso') + if not ISO_PATTERN.match(iso): + sys.stderr.write('invalid ISO key name: %s\n' % iso) + continue + if not iso[0] in unsorted_rows: + unsorted_rows[iso[0]] = [] + unsorted_rows[iso[0]].append((int(iso[1:]), key)) + # add subkeys + longPress = _map.get('longPress') + if longPress: + for value in longPress.split(' '): + subkey = parse_single_key(value) + key.append(subkey) + + rows = [] + for k, v in sorted(list(unsorted_rows.items()), + key=lambda x: x[0], + reverse=True): + row = [] + for key in sorted(v, key=lambda x: x): + row.append(key[1]) + rows.append(row) + + return rows + + +def convert_xml(tree): + root = {} + for xml_keyboard in tree.iter("keyboard"): + locale_full = xml_keyboard.get("locale") + locale, sep, end = locale_full.partition("-t-") + root["locale"] = locale + for xml_name in tree.iter("name"): + name = xml_name.get("value") + root["name"] = name + root["levels"] = [] + # parse levels for index, keymap in enumerate(tree.iter('keyMap')): - level = Element('level') - rows = {} - root.append(level) - level.set('name', 'level%d' % (index+1)) + level = {} + root["levels"].append(level) + level["level"] = index + 1 # FIXME: heuristics here modifiers = keymap.get('modifiers') if not modifiers: @@ -59,70 +112,99 @@ def convert(source, tree): mode = 'latched' else: mode = 'locked' - level.set('mode', mode) - for _map in keymap.iter('map'): - value = _map.get('to') - key = parse_single_key(value) - iso = _map.get('iso') - if not ISO_PATTERN.match(iso): - sys.stderr.write('invalid ISO key name: %s\n' % iso) - continue - if not iso[0] in rows: - rows[iso[0]] = [] - rows[iso[0]].append((int(iso[1:]), key)) - # add attribute to certain keys - name = key.get('name') - if name == 'space': - key.set('align', 'center') - key.set('width', '6.0') - if name in ('space', 'BackSpace'): - key.set('repeatable', 'yes') - # add subkeys - longPress = _map.get('longPress') - if longPress: - for value in longPress.split(' '): - subkey = parse_single_key(value) - key.append(subkey) - for k, v in sorted(list(rows.items()), key=lambda x: x[0], reverse=True): - row = Element('row') - for key in sorted(v, key=lambda x: x): - row.append(key[1]) - level.append(row) + level["mode"] = mode + level["rows"] = parse_rows(keymap) return root -def indent(elem, level=0): - i = "\n" + level*" " - if len(elem): - if not elem.text or not elem.text.strip(): - elem.text = i + " " - if not elem.tail or not elem.tail.strip(): - elem.tail = i - for elem in elem: - indent(elem, level+1) - if not elem.tail or not elem.tail.strip(): - elem.tail = i +def locale_to_xkb(locale, name): + if locale in sorted(LOCALE_TO_XKB_OVERRIDES.keys()): + xkb = LOCALE_TO_XKB_OVERRIDES[locale] + logging.debug("override for %s → %s", + locale, xkb) + if xkb: + return xkb + else: + raise KeyError("layout %s explicitely disabled in overrides" + % locale) + xkb_names = sorted(name_to_xkb.keys()) + if name in xkb_names: + return name_to_xkb[name] else: - if level and (not elem.tail or not elem.tail.strip()): - elem.tail = i + logging.debug("name %s failed" % name) + for sub_name in name.split(' '): + if sub_name in xkb_names: + xkb = name_to_xkb[sub_name] + logging.debug("dumb mapping failed but match with locale word: " + "%s (%s) → %s (%s)", + locale, name, xkb, sub_name) + return xkb + else: + logging.debug("sub_name failed") + for xkb_name in xkb_names: + for xkb_sub_name in xkb_name.split(' '): + if xkb_sub_name.strip('()') == name: + xkb = name_to_xkb[xkb_name] + logging.debug("dumb mapping failed but match with xkb word: " + "%s (%s) → %s (%s)", + locale, name, xkb, xkb_name) + return xkb + raise KeyError("failed to find XKB mapping for %s" % locale) + + +def convert_file(source_file, destination_path): + logging.info("Parsing %s", source_file) + + itree = xml.etree.ElementTree.ElementTree() + itree.parse(source_file) + + root = convert_xml(itree) + + try: + xkb_name = locale_to_xkb(root["locale"], root["name"]) + except KeyError as e: + logging.warn(e) + return False + destination_file = os.path.join(destination_path, xkb_name + ".json") + + with open(destination_file, 'w', encoding="utf-8") as dest_fd: + json.dump(root, dest_fd, ensure_ascii=False, indent=2, sort_keys=True) + + logging.debug("written %s", destination_file) + + +def load_xkb_mappings(): + xkb = GnomeDesktop.XkbInfo() + layouts = xkb.get_all_layouts() + name_to_xkb = {} + + for layout in layouts: + name = xkb.get_layout_info(layout).display_name + name_to_xkb[name] = layout + + return name_to_xkb + if __name__ == "__main__": - import sys + if "DEBUG" in os.environ: + logging.basicConfig(level=logging.DEBUG) - if len(sys.argv) != 2: + locale.setlocale(locale.LC_ALL, "C") + name_to_xkb = load_xkb_mappings() + + if len(sys.argv) < 2: print("supply a CLDR keyboard file") sys.exit(1) - source = sys.argv[-1] - itree = ElementTree() - itree.parse(source) + if len(sys.argv) < 3: + print("supply an output directory") + sys.exit(1) - root = convert(source, itree) - indent(root) - - otree = ElementTree(root) - if hasattr(sys.stdout, 'buffer'): - out = sys.stdout.buffer - else: - out = sys.stdout - otree.write(out, xml_declaration=True, encoding='UTF-8') + source = sys.argv[1] + destination = sys.argv[2] + if os.path.isfile(source): + convert_file(source, destination) + elif os.path.isdir(source): + os.chdir(source) + for path in glob.glob("*-t-k0-android.xml"): + convert_file(path, destination)