#!/usr/bin/env python3 # Check Python version. import sys if sys.version_info < (3,7): print('While emo.py avoids recent Python features to maximize compatibility,') print('it does require 3.7 or later. Please upgrade your Python.') sys.exit(1) # ------------------------------------------------------------------------- # (C) Copyright 2023 by Robert Grimm, released under the Apache 2.0 license # ------------------------------------------------------------------------- from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from dataclasses import dataclass, field from datetime import datetime from enum import Enum from itertools import chain import json import os from pathlib import Path import re import shutil import subprocess from typing import ( Any, Dict, Iterable, KeysView, List, Mapping, NoReturn, Optional, TextIO, Tuple, Union ) from urllib.request import urlopen # -------------------------------------------------------------------------------------- # Provide a simple console logger class Logger: def __init__(self, out: TextIO = sys.stderr) -> None: self._out = out self._first_header = True def sgr(self, open: str, text: str, close: str) -> str: if self._out.isatty(): return f'\x1b[{open}m{text}\x1b[{close}m' return text def pln(self, text: str = '') -> None: print(text, file=self._out) def header(self, text: str) -> None: if self._first_header: self._first_header = False else: self.pln() self.pln(self.sgr('1', text, '0')) def detail(self, text: str) -> None: self.pln(f' {text}') def error(self, text: str) -> None: self.pln(self.sgr('1;31', f'ERROR: {text}', '0;39')) def warning(self, text: str) -> None: self.pln(self.sgr('1;38;5;208', f'WARNING: {text}', '0;39')) def info(self, text: str) -> None: self.pln(self.sgr('1;34', f'INFO: {text}', '0;39')) logger = Logger() # -------------------------------------------------------------------------------------- # Build a self-contained `demo.html` STYLE_LINK = re.compile( f'' ) def make_demo(): # Write extra styles to file. logger.info('Writing auxiliary style sheet "demo.css"') with open('demo.css', mode='w', encoding='utf8') as file: file.write(""" :root { font-size: 300%; } """) # Convert to HTML and read in result. logger.info('Converting LaTeX source in "demo.tex" to HTML in "demo.tmp.html"') subprocess.run([ 'latexmlc', '--css=demo.css', '--destination=demo.tmp.html', 'demo.tex'] ) with open('demo.tmp.html', mode='r', encoding='utf8') as file: content = file.read() # Find all links to style sheets and replace them with CSS content. style_sheets = [] fragments = [] last_index = 0 for link in STYLE_LINK.finditer(content): fragments.append(content[last_index:link.start()]) style_sheet = link.group('sheet') style_sheets.append(style_sheet) logger.info(f'Loading style sheet "{style_sheet}"') with open(style_sheet, mode='r', encoding='utf8') as file: css = file.read() if not css.startswith('\n'): css = f'\n{css}' fragments.append(f'') last_index = link.end() fragments.append(content[last_index:]) # Write result and clean up. logger.info(f'Writing self-contained HTML document "demo.html"') with open('demo.html', mode='w', encoding='utf8') as file: for fragment in fragments: file.write(fragment) os.unlink('demo.tmp.html') for style_sheet in style_sheets: os.unlink(style_sheet) # -------------------------------------------------------------------------------------- # Build an archive for release EMO_FILES = ( 'emo.def', 'emo.dtx', 'emo.pdf', 'emo-lingchi.ttf', 'README.md', 'config/emo.py', 'config/emoji-test.txt' ) EMO_GRAPHICS = 'emo-graphics' EMO_METADATA = re.compile( r""" ^[ ]{4}\[ (?P\d{4}/\d{1,2}/\d{1,2}) [ ] v(?P\d+\.\d+) [ ] (?P[^\]]+) \] """, re.VERBOSE | re.MULTILINE ) def make_release() -> None: # Determine repository root. source = Path(__file__).parent.parent # Determine package metadata. metadata = EMO_METADATA.search((source / 'emo.dtx').read_text(encoding='utf8')) if metadata is None: raise ValueError(f'Package metadata missing from "{source / "emo.dtx"}"') version = metadata.group('version') logger.info(f'Preparing release {version} for "{source}"') # Make sure no archive exists. archive = source / f'emo-{version}.zip' if archive.exists(): raise ValueError( f'Archive file "{archive}" already exists, please move out of way.' ) # Set up staging directory. staging = source / 'emo' if staging.exists(): raise ValueError( f'Staging directory "{staging}" already exists, please move out of way.' ) logger.info(f'Creating staging directory "{staging}"') staging.mkdir() # Process all files belonging into release. for path in chain(map(Path, EMO_FILES), source.glob('emo-graphics/emo-*.pdf')): path = source / path # To stage a file in the repository root, just copy it. if path.parent == source: logger.info(f'Staging "{path.name}"') shutil.copy(path, staging) continue # To stage a file in a subdirectory, get relative path and, if necessary, # recreate path in staging area. Then copy into that subdirectory. nested_staging = staging / path.parent.relative_to(source) if not nested_staging.exists(): logger.info(f'Creating nested staging directory "{nested_staging}"') nested_staging.mkdir(parents=True) logger.info(f'Staging "{path.relative_to(source)}"') shutil.copy(path, nested_staging) # Create archive. shutil.make_archive( str(archive.with_suffix('')), 'zip', root_dir=staging.parent, base_dir=staging.name, ) # -------------------------------------------------------------------------------------- # Normalize emoji names PUNCTUATION = re.compile(r"""["'’“”&!(),:]""") SEPARATORS = re.compile(r'[ _\-]+') # The list of name overrides. RENAMING = { 'a-button-blood-type': 'a-button', 'ab-button-blood-type': 'ab-button', 'b-button-blood-type': 'b-button', 'o-button-blood-type': 'o-button', 'bust-in-silhouette': 'bust', 'busts-in-silhouette': 'busts', 'flag-european-union': 'eu', 'globe-showing-americas': 'globe-americas', 'globe-showing-asia-australia': 'globe-asia-australia', 'globe-showing-europe-africa': 'globe-africa-europe', 'hear-no-evil-monkey': 'hear-no-evil', 'index-pointing-at-the-viewer': 'index-pointing-at-viewer', 'index-pointing-at-the-viewer-darkest': 'index-pointing-at-viewer-darkest', 'index-pointing-at-the-viewer-darker': 'index-pointing-at-viewer-darker', 'index-pointing-at-the-viewer-medium': 'index-pointing-at-viewer-medium', 'index-pointing-at-the-viewer-lighter': 'index-pointing-at-viewer-lighter', 'index-pointing-at-the-viewer-lightest': 'index-pointing-at-viewer-lightest', 'keycap-*': 'keycap-star', 'keycap-#': 'keycap-hash', 'keycap-0': 'keycap-zero', 'keycap-1': 'keycap-one', 'keycap-2': 'keycap-two', 'keycap-3': 'keycap-three', 'keycap-4': 'keycap-four', 'keycap-5': 'keycap-five', 'keycap-6': 'keycap-six', 'keycap-7': 'keycap-seven', 'keycap-8': 'keycap-eight', 'keycap-9': 'keycap-nine', 'keycap-10': 'keycap-ten', 'magnifying-glass-tilted-left': 'loupe-left', 'magnifying-glass-tilted-right': 'loupe-right', 'palm-down-hand': 'palm-down', 'palm-down-hand-darkest': 'palm-down-darkest', 'palm-down-hand-darker': 'palm-down-darker', 'palm-down-hand-medium': 'palm-down-medium', 'palm-down-hand-lighter': 'palm-down-lighter', 'palm-down-hand-lightest': 'palm-down-lightest', 'palm-up-hand': 'palm-up', 'palm-up-hand-darkest': 'palm-up-darkest', 'palm-up-hand-darker': 'palm-up-darker', 'palm-up-hand-medium': 'palm-up-medium', 'palm-up-hand-lighter': 'palm-up-lighter', 'palm-up-hand-lightest': 'palm-up-lightest', 'rolling-on-the-floor-laughing': 'rofl', 'see-no-evil-monkey': 'see-no-evil', 'speak-no-evil-monkey': 'speak-no-evil', } def to_name(value: str) -> str: """Turn the given string as an emoji name.""" name = value.lower() name = PUNCTUATION.sub('', name) name = SEPARATORS.sub('-', name) # Use simpler skin tone indicators. Do not reorder. name = name.replace('medium-dark-skin-tone', 'darker') name = name.replace('medium-light-skin-tone', 'lighter') name = name.replace('medium-skin-tone', 'medium') name = name.replace('dark-skin-tone', 'darkest') name = name.replace('light-skin-tone', 'lightest') return RENAMING.get(name, name) # -------------------------------------------------------------------------------------- # Handle emoji codepoints def to_codepoint(cp: Union[int, str]) -> int: if isinstance(cp, int): return cp if cp.startswith(('0x', 'U+')): cp = cp[2:] return int(cp, base=16) def to_codepoints(value: Union[str, Iterable[Union[int,str]]]) -> Tuple[int, ...]: if isinstance(value, str): return tuple(ord(c) for c in value) return tuple(to_codepoint(cp) for cp in value) REGIONAL_INDICATOR_A = 0x1f1e6 REGIONAL_INDICATOR_Z = 0x1f1ff LETTER_CAPITAL_A = ord('A') def is_regional_indicator(cp: int) -> bool: return REGIONAL_INDICATOR_A <= cp <= REGIONAL_INDICATOR_Z def regional_indicator_to_letter(cp: int) -> str: return chr(cp - REGIONAL_INDICATOR_A + LETTER_CAPITAL_A) # -------------------------------------------------------------------------------------- # Normalize emoji group and subgroup names AMPERSAND = re.compile('[ ]*&[ ]*') SHORT_GROUPS = { 'animals': 'animals-and-nature', 'body': 'people-and-body', 'drink': 'food-and-drink', 'emotion': 'smileys-and-emotion', 'food': 'food-and-drink', 'nature': 'animals-and-nature', 'people': 'people-and-body', 'places': 'travel-and-places', 'smileys': 'smileys-and-emotion', 'travel': 'travel-and-places', } def to_group(group: str) -> str: group = group.lower() group = SHORT_GROUPS.get(group, group) return AMPERSAND.sub('-and-', group) def to_subgroup(subgroup: str) -> str: subgroup = subgroup.lower() return AMPERSAND.sub('-and-', subgroup) def to_group_subgroup(group: str, subgroup: str) -> Tuple[str, str]: return to_group(group), to_subgroup(subgroup) def is_subgroup_selector(identifier: str) -> bool: return '::' in identifier def split_subgroup_selector(identifier: str) -> List[str]: return identifier.lower().split('::') # -------------------------------------------------------------------------------------- # Define emoji status class Status(str, Enum): COMPONENT = 'component' FULLY_QUALIFIED = 'fully-qualified' MINIMALLY_QUALIFIED = 'minimally-qualified' UNQUALIFIED = 'unqualified' # -------------------------------------------------------------------------------------- # Define emoji descriptor @dataclass(frozen=True, order=True) class Emoji: """Representation of an emoji. The status is optional to allow for quick hacks.""" name: str = field(compare=False) codepoints: Tuple[int,...] display: str = field(init=False, compare=False) status: Optional[Status] = field(default=None, compare=False) version: Optional[float] = field(default=None, compare=False) def __post_init__(self) -> None: display = ''.join(map(lambda cp: chr(cp), self.codepoints)) object.__setattr__(self, 'display', display) @classmethod def of( cls, name: str, value: Union[str, Iterable[Union[int,str]]], status: Union[str, Status, None] = None, version: Union[str, float, None] = None, ) -> 'Emoji': if status is not None and not isinstance(status, Status): status = Status(status) if isinstance(version, str): version = float(version) return Emoji(to_name(name), to_codepoints(value), status, version) def __str__(self) -> str: return self.display def __repr__(self) -> str: if self.status is None: return f'Emoji.of("{self.name}", "{self.display}")' else: return f'Emoji.of("{self.name}", "{self.display}", "{self.status.value}")' @property def has_compound_name(self) -> bool: return '-' in self.name @property def is_regional_flag(self) -> bool: return ( len(self.codepoints) == 2 and all(is_regional_indicator(cp) for cp in self.codepoints) ) @property def is_component(self) -> bool: return self.status is Status.COMPONENT @property def is_fully_qualified(self) -> bool: return self.status is Status.FULLY_QUALIFIED @property def unicode(self) -> str: return ' '.join(f'U+{cp:04X}' for cp in self.codepoints) @property def latex_chars(self) -> str: return ''.join(f'\char"{cp:04X}' for cp in self.codepoints) @property def svg_file(self) -> str: # Emoji for national flags leverage the country's ISO 3166-1 alpha-2 code. if self.is_regional_flag: return ''.join( regional_indicator_to_letter(cp) for cp in self.codepoints ) + '.svg' # Skip Emoji presentation selector. codepoints = '_'.join(f'{cp:04x}' for cp in self.codepoints if cp != 0xFE0F) return f'emoji_u{codepoints}.svg' @property def svg_path(self) -> str: if self.is_regional_flag: return f'third_party/regional-flags/svg/{self.svg_file}' return f'svg/{self.svg_file}' @property def pdf_file(self) -> str: return f'emo-{self.name}.pdf' @property def latex_table_entry(self) -> str: if self.has_compound_name: prefix = f'\expandafter\def\csname emo@emoji@{self.name}\endcsname' else: prefix = f'\def\emo@emoji@{self.name}' return f'{prefix}{{{str(self)}}}' # -------------------------------------------------------------------------------------- # Parse Unicode TR-51's `emoji-test.txt` NameTable = Mapping[str, Emoji] CodepointTable = Mapping[Tuple[int, ...], Emoji] SubgroupTable = Mapping[str, Tuple[Emoji, ...]] GroupTable = Mapping[str, SubgroupTable] class RegistryParser: """ Parser for the `[emoji-test.txt](https://www.unicode.org/Public/emoji/latest/emoji-test.txt)` file accompanying [Unicode TR-51](https://www.unicode.org/reports/tr51/). It is the most complete listing of Unicode emoji sequences and names and conveniently also organizes them into meaningful groups and subgroups. The `run()` method returns two tables: 1. The identifier table maps emoji names, emoji (fully qualified and otherwise), and Unicode code sequences (fully qualified and otherwise) to Emoji instances (fully qualified only). 2. The group table maps group names to subgroup names to sequences of Emoji instances. For group "component," those emoji have component status. For all other groups, they are fully qualified. """ def __init__(self, path: Union[str, Path]) -> None: self._path: Union[str, Path] = path self._lineno = 0 self._name_table: Dict[str, Emoji] = {} self._codepoint_table: Dict[Tuple[int, ...], Emoji] = {} self._group_table: Dict[str, Dict[str, Tuple[Emoji, ...]]] = {} self._group_name: Optional[str] = None self._group: Optional[Dict[str, Tuple[Emoji, ...]]] = None self._subgroup_name: Optional[str] = None self._subgroup: Optional[List[Emoji]] = None def error(self, msg: str) -> NoReturn: raise ValueError(f'{self._path}:{self._lineno}: {msg}') GROUP_PREFIX = '# group: ' SUBGROUP_PREFIX = '# subgroup: ' EMOJI_DECLARATION = re.compile(r""" ^ (?P[0-9A-F][0-9A-F ]+[0-9A-F]) [ ]+ [;] [ ] (?Pcomponent|fully-qualified|minimally-qualified|unqualified) [ ]+ [#] [ ] (?P[^ ]+) [ ] [E](?P[0-9.]+) [ ] (?P.+) $ """, re.X) def parse_line(self, line: str) -> Union[Emoji, Tuple[str, str], None]: line = line.strip() # Group and subgroup are specified in comments. if line.startswith(self.GROUP_PREFIX): return 'group', to_group(line[len(self.GROUP_PREFIX):]) if line.startswith(self.SUBGROUP_PREFIX): return 'subgroup', to_subgroup(line[len(self.SUBGROUP_PREFIX):]) if line == '' or line[0] == '#': return None match = self.EMOJI_DECLARATION.match(line) if match is None: self.error('neither empty, comment, or emoji') return Emoji.of( match.group('name'), match.group('codepoints').split(), match.group('status'), match.group('version') ) def enter_group(self, name: str) -> None: assert self._subgroup_name is None self._group_name = name self._group = self._group_table.setdefault(name, {}) def enter_subgroup(self, name: str) -> None: assert self._subgroup_name is None if self._group is None: self.error('subgroup without prior group declaration') self._subgroup_name = name self._subgroup = list(self._group[name]) if name in self._group else [] def maybe_exit_subgroup(self) -> None: if self._subgroup_name is not None: assert self._group is not None assert self._subgroup is not None self._group[self._subgroup_name] = tuple(self._subgroup) self._subgroup_name = None self._subgroup = None def add_emoji(self, emoji: Emoji) -> None: # There must be a group and subgroup. if self._subgroup_name is None: self.error('emoji without prior group and subgroup declaration') # Only register emoji with new codepoints. if emoji.codepoints in self._codepoint_table: self.error( f'duplicate emoji by codepoints {emoji.display} ({emoji.unicode})' ) # Only register component and fully qualified emoji with new names. if emoji.is_component and emoji.name in self._name_table: self.error( 'duplicate declaration of component ' f'emoji by name {emoji.display} ({emoji.unicode})' ) if emoji.is_fully_qualified and emoji.name in self._name_table: self.error( 'duplicate declaration of fully qualified ' f'emoji by name {emoji.display} ({emoji.unicode})' ) # Only the component group contains only component emoji. if self._group_name == 'component' and not emoji.is_component: self.error( 'component group with non-component ' f'emoji {emoji.display} ({emoji.unicode})' ) if emoji.is_component and self._group_name != 'component': self.error( f'component emoji {emoji.display} ({emoji.unicode}) ' 'outside component group' ) # Record all emoji by codepoints. self._codepoint_table[emoji.codepoints] = emoji # Record component and fully qualified emoji also by name and group/subgroup. if emoji.is_component or emoji.is_fully_qualified: self._name_table[emoji.name] = emoji assert self._subgroup is not None self._subgroup.append(emoji) def run(self) -> Tuple[NameTable, CodepointTable, GroupTable]: assert self._lineno == 0 with open(self._path, mode='r', encoding='utf8') as file: while True: line = file.readline() if line == '': break self._lineno += 1 item = self.parse_line(line[:-1]) if item is None: continue if isinstance(item, Emoji): self.add_emoji(item) continue self.maybe_exit_subgroup() grouping, name = item if grouping == 'group': self.enter_group(name) else: self.enter_subgroup(name) self.maybe_exit_subgroup() # Patch non-component identifiers to point to fully qualified emoji descriptors. for identifier, emoji in self._codepoint_table.items(): if emoji.is_component or emoji.is_fully_qualified: continue fully_qualified_emoji = self._name_table.get(emoji.name) if fully_qualified_emoji is None: self.error( f'no fully qualified emoji for {emoji.display} ({emoji.unicode})' ) self._codepoint_table[identifier] = fully_qualified_emoji return self._name_table, self._codepoint_table, self._group_table # -------------------------------------------------------------------------------------- # Maintain emoji registry class Registry: def __init__( self, name_table: NameTable, codepoint_table: CodepointTable, group_table: GroupTable ) -> None: """Create a new emoji registry. Use `from_file()` instead.""" self._name_table = name_table self._codepoint_table = codepoint_table self._group_table = group_table @classmethod def from_file(cls, path: Union[str, Path]) -> 'Registry': """Instantiate a new registry instance from the given file.""" return Registry(*RegistryParser(path).run()) def emoji_names(self) -> KeysView[str]: """Get the names of all registered emoji.""" return self._name_table.keys() def lookup(self, identifier: Union[str, Tuple[int, ...]]) -> Optional[Emoji]: """Look up an emoji by name or codepoints.""" if isinstance(identifier, str): return self._name_table.get(identifier.lower()) return self._codepoint_table.get(identifier) def is_group(self, group: str) -> bool: """Determine if the group name is valid.""" return group in self._group_table def is_subgroup(self, group: str, subgroup: str) -> bool: """Determine if the subgroup name is valid. The group name must be valid.""" return subgroup in self._group_table[group] def group_names(self) -> KeysView[str]: """Get the names of all groups.""" return self._group_table.keys() def subgroup_names(self, group: str) -> KeysView[str]: """Get the names of all subgroups.""" return self._group_table[group].keys() def subgroup(self, group: str, subgroup: str) -> Tuple[Emoji, ...]: """Get the subgroup of the group.""" return self._group_table[group][subgroup] def subgroup_from_selector(self, selector: str) -> Tuple[Emoji, ...]: """Get the subgroup for the given `group::subgroup` selector.""" names = split_subgroup_selector(selector) if len(names) != 2: raise KeyError(f'selector "{selector}" does not combine two names') group, subgroup = to_group_subgroup(*names) if not self.is_group(group): raise KeyError(f'selector "{selector}" names non-existent group') if not self.is_subgroup(group, subgroup): raise KeyError(f'selector "{selector}" names non-existent subgroup') return self.subgroup(group, subgroup) def select(self, *selectors: str) -> List[Emoji]: """Get the emoji matching the given selectors.""" selection: List[Emoji] = list() for selector in selectors: # 'ALL' -- all emoji if selector == 'ALL': for group in self.group_names(): for subgroup in self.subgroup_names(group): selection.extend(self.subgroup(group, subgroup)) continue # group::subgroup -- all emoji in the subgroup if is_subgroup_selector(selector): selection.extend(self.subgroup_from_selector(selector)) continue # name -- all emoji in the group, if it exists group = to_group(selector) if self.is_group(group): for subgroup in self.subgroup_names(group): selection.extend(self.subgroup(group, subgroup)) continue # name -- the named emoji, if it exists name = selector.lower() if name in self._name_table: selection.append(self._name_table[name]) continue raise KeyError(f'selector "{selector}" names neither emoji nor group') return selection def dump(self, file: Optional[TextIO] = None) -> None: """Dump the registry contents by groups and subgroups.""" if file is None: file = sys.stdout for group in self.group_names(): for subgroup in self.subgroup_names(group): file.write(group) file.write('∷') file.write(subgroup) file.write(' ≡ ') file.write(''.join(e.display for e in self.subgroup(group, subgroup))) file.write('\n') # -------------------------------------------------------------------------------------- # Download Noto emoji sources NOTO_REPOSITORY = 'https://github.com/googlefonts/noto-emoji/archive/refs/heads/main.zip' def is_valid_noto_emoji(noto_path: Path) -> bool: if not noto_path.exists(): return False if not noto_path.is_dir(): raise ValueError( f'The Noto emoji path "{noto_path}" is not even a directory. ' 'Please move file out of the way or change path with --noto-emoji.' ) entries = set(entry.name for entry in noto_path.iterdir()) if ( 'colrv1' in entries and 'svg' in entries and 'third_party' in entries and 'emoji_aliases.txt' in entries ): return True raise ValueError( f'The Noto emoji path "{noto_path}" points to a directory without ' 'expected contents. Please move directory out of the way or change ' 'path with --noto-emoji' ) def ensure_local_noto_emoji(noto_path: Path, verbose: bool = False) -> None: if is_valid_noto_emoji(noto_path): if verbose: logger.info(f'Seemingly valid Noto emoji sources at "{noto_path}"') return noto_zip = noto_path.with_name('noto-emoji.zip') if not noto_zip.exists(): if verbose: logger.info(f'Downloading Noto emoji sources from "{NOTO_REPOSITORY}"') with urlopen(NOTO_REPOSITORY) as response, open(noto_zip, mode='wb') as file: shutil.copyfileobj(response, file) # With archive representing main branch, it is unpacked into # noto-emoji-main. We fix that after unpacking. if verbose: logger.info(f'Unpacking Noto emoji sources into "{noto_path}"') shutil.unpack_archive(noto_zip, noto_path.parent, 'zip') noto_path.with_name('noto-emoji-main').rename(noto_path) # -------------------------------------------------------------------------------------- # Convert SVG to PDF def remove_page_group_object(document: dict) -> Optional[dict]: """Remove the /Page /Group object from the document in qpdf's JSON format.""" objects = document['qpdf'][1] def resolve(ref: str) -> Any: key = ref if ref == 'trailer' else f'obj:{ref}' if key not in objects: raise KeyError(ref) return objects[key] def resolve_value(ref, type=None) -> Any: o = resolve(ref) if 'value' not in o: raise ValueError(f'{ref} does not reference object') v = o['value'] if type is not None and v.get('/Type') != type: raise ValueError( f'{ref} references object of type {v["/Type"]} not {type}' ) return v trailer = resolve_value('trailer') root = resolve_value(trailer['/Root'], '/Catalog') pages = resolve_value(root['/Pages'], '/Pages')['/Kids'] if len(pages) > 1: raise ValueError(f'PDF has {len(pages)} pages instead of just one') page = resolve_value(pages[0], '/Page') if not '/Group' in page: return None del page['/Group'] return document def remove_page_group(path: Path) -> bool: with open(path, mode='r', encoding='utf8') as file: document = json.load(file) document = remove_page_group_object(document) if document is None: return False tmp = path.with_suffix('.patched.json') with open(tmp, mode='w', encoding='utf8') as file: json.dump(document, file) tmp.replace(path) return True def fix_pdf(qpdf: str, path: Path) -> None: json_path = path.with_suffix('.json') subprocess.run([qpdf, str(path), '--json-output', str(json_path)], check=True) changed = remove_page_group(json_path) if not changed: return tmp = path.with_suffix('.patched.pdf') subprocess.run([qpdf, str(json_path), '--json-input', str(tmp)], check=True) json_path.unlink() tmp.replace(path) def convert_svg_to_pdf(rsvg_convert: str, source: Path, target: Path) -> None: subprocess.run([rsvg_convert, str(source), '-f', 'Pdf', '-o', str(target)], check=True) def which(tool: str) -> str: path = shutil.which(tool) if path is None: raise FileNotFoundError(tool) return path @dataclass(frozen=True) class Converter: qpdf: str rsvg_convert: str source_dir: Path target_dir: Path @classmethod def create( cls, source_dir: Union[Path, str], target_dir: Union[Path, str], ) -> 'Converter': return cls( qpdf = which('qpdf'), rsvg_convert = which('rsvg-convert'), source_dir = Path(source_dir), target_dir = Path(target_dir), ) def __call__(self, emoji: 'Emoji', verbose: bool = False) -> Path: source = self.source_dir / emoji.svg_path target = self.target_dir / emoji.pdf_file if not target.exists(): if verbose: logger.info(f'Converting "{source}" to "{target}"') convert_svg_to_pdf(self.rsvg_convert, source, target) if verbose: logger.info(f'Fixing /Page /Group in "{target}"') fix_pdf(self.qpdf, target) return target # -------------------------------------------------------------------------------------- # Provide tool help and command line options DESCRIPTION = """ Generate emoji table and PDF files for the given selectors. A selector may be a group name, a group and subgroup name with a double colon and no spaces between them, an emoji name, or `ALL` for all emoji. With some exceptions, an emoji's name is the emoji's Unicode name with punctuation stripped, spaces replaced by dashes, and skin tone modifiers simplified to `darkest`, `darker`, `medium`, `lighter`, and `lightest` (instead of `dark-skin-tone`, `medium-dark-skin-tone`, `medium-skin-tone`, `medium-light-skin-tone`, and `light-skin-tone`). If suitably named PDF files exist in the graphics directory, they are not recreated but included in the emoji table. """ def resolved_path(path: str) -> Path: return Path(path).resolve() def create_parser() -> ArgumentParser: parser = ArgumentParser( description=DESCRIPTION, formatter_class=ArgumentDefaultsHelpFormatter, ) parser.add_argument( '--dry-run', action='store_true', help='do not write to file system' ) parser.add_argument( '-v', '--verbose', action='store_true', help='enable verbose mode' ) parser.add_argument( '--registry', type=resolved_path, default='config/emoji-test.txt', metavar='PATH', help='use path for file with Unicode emoji sequences', ) parser.add_argument( '--noto-emoji', type=resolved_path, default='noto-emoji', metavar='PATH', help='use path for directory with Noto color emoji sources', ) parser.add_argument( '--graphics', type=resolved_path, default='emo-graphics', metavar='PATH', help='use path for directory with generated PDF graphics', ) parser.add_argument( '--latex-table', type=resolved_path, default='emo.def', metavar='PATH', help='use path for file with LaTeX emoji table', ) group = parser.add_mutually_exclusive_group() group.add_argument( '--show-group-names', action='store_true', help='show supported group, subgroup names and exit', ) group.add_argument( '--show-emoji-names', action='store_true', help='show supported emoji names and exit', ) group.add_argument( '--show-special-names', action='store_true', help='show map from (simplified) Unicode names to emoji names and exit' ) group.add_argument( '--show-names', action='store_true', help='show group, emoji, as well as special names and exit' ) group.add_argument( '--make-demo', action='store_true', help='make the demo document and exit', ) group.add_argument( '-r', '--make-release', action='store_true', help='make a release and exit', ) parser.add_argument( 'selectors', nargs='*', help='names of emoji groups or emoji', ) return parser # -------------------------------------------------------------------------------------- # Show group, emoji, and special names def show_names(registry: Registry, options: Any) -> bool: showed_something = False if options.show_group_names or options.show_names: logger.header('Supported groups and subgroups:') for group in registry.group_names(): for subgroup in registry.subgroup_names(group): logger.detail(f'{group}::{subgroup}') showed_something = True if options.show_emoji_names or options.show_names: logger.header('Supported emoji names:') names = list(registry.emoji_names()) names.sort() for name in names: logger.detail(f'{name}') showed_something = True if options.show_special_names or options.show_names: logger.header('Map from (simplified) Unicode to (special) emoji names:') for unicode, selector in RENAMING.items(): logger.detail(f'{unicode:40s} ▶ {selector}') showed_something = True return showed_something # -------------------------------------------------------------------------------------- # Create emoji inventory SPECIAL_FILES = ('emo-lingchi.pdf', 'emo-YHWH.pdf') def create_inventory(registry: Registry, options: Any) -> List[Emoji]: specials = list(SPECIAL_FILES) inventory: List[Emoji] = [] if options.graphics.exists() and options.graphics.is_dir(): for entry in options.graphics.iterdir(): if not entry.is_file() or not entry.match('emo-*.pdf'): continue if entry.name in SPECIAL_FILES: specials.remove(entry.name) continue emoji = registry.lookup(entry.stem[4:]) if emoji is not None: inventory.append(emoji) elif options.verbose: logger.warning(f'"{entry.name}" does not depict an emoji') if len(specials) == 1: raise FileNotFoundError(f'PDF graphic "emo-graphics/{specials[0]}" is missing!') elif len(specials) == 2: raise FileNotFoundError( f'PDF graphics "{specials[0]}" and "{specials[1]}" ' 'in "emo-graphics" are missing!' ) return inventory # -------------------------------------------------------------------------------------- # Write emoji table def write_emoji_table( requested_emoji: List[Emoji], existing_emoji: List[Emoji], options: Any ) -> List[Emoji]: all_emoji = list(set(requested_emoji) | set(existing_emoji)) all_emoji.sort() tmp_table = options.latex_table.with_suffix('.latest.def') if not options.dry_run: with open(tmp_table, mode='w', encoding='utf8') as file: today = datetime.today().strftime('%Y-%m-%d') file.write(f'\\ProvidesFile{{emo.def}}[{today}]\n') for emoji in all_emoji: file.write(emoji.latex_table_entry) file.write('\n') file.write(""" \\ifemo@extra \\def\\emo@emoji@lingchi{凌遲} \\def\\emo@emoji@YHWH{\\begingroup\\textdir TRT יהוה\\endgroup} \\fi """) tmp_table.replace(options.latex_table) return all_emoji # -------------------------------------------------------------------------------------- # Run this script def main() -> None: try: # Parse command line options. options = create_parser().parse_args() # Create release. if (options.make_release or options.make_demo) and options.dry_run: raise ValueError('Unable to dry run selected build function') elif options.make_release: make_release() return elif options.make_demo: make_demo() return # Populate registry, maybe list names. registry = Registry.from_file(options.registry) if show_names(registry, options): return # Determine requested emoji. requested_emoji = registry.select(*options.selectors) # Ensure directory for PDF graphics exists and create converter. if not options.dry_run: options.graphics.mkdir(parents=True, exist_ok=True) convert = Converter.create(options.noto_emoji, options.graphics) # Create inventory of existing emoji. existing_emoji = create_inventory(registry, options) # Download Noto emoji sources if they haven't been before. if not options.dry_run: ensure_local_noto_emoji(options.noto_emoji, options.verbose) # Convert requested emoji, which does not recreate existing emoji. if not options.dry_run: for emoji in requested_emoji: convert(emoji, options.verbose) # Write the emoji table for all emoji. all_emoji = write_emoji_table(requested_emoji, existing_emoji, options) if options.verbose: logger.info('Supported emoji: ' + ' '.join(e.display for e in all_emoji)) except Exception as x: logger.error(str(x)) if __name__ == '__main__': main()