# (c) 2013-2018 Sebastian Humenda # This code is licenced under the terms of the LGPL-3+, see the file COPYING for # more details. """This module contains functionality to typeset formuas for the usage in a LaTeX document (e.g. creating the preamble, replacing non-ascii letters) and to typeset LaTeX formulas in a more readable way as alternate description of the resulting image. """ from . import unicode FORMATTING_COMMANDS = ['\\ ', '\\,', '\\;', '\\big', '\\Big', '\\left', '\\right', '\\limits'] class DocumentSerializationException(Exception): """This error is raised whenever a non-ascii character contained in a formula could not be replaced by a LaTeX command. It provides the following attributes: formula - the formula index - position in formula upoint - unicode point.""" def __init__(self, formula, index, upoint): self.formula = formula self.index = index self.upoint = upoint super().__init__(formula, index, upoint) def __str__(self): return ("could not find LaTeX replacement command for unicode " "character %d, index %d in formula %s") % (self.upoint, self.index, self.formula) def escape_unicode_maths(formula, replace_alphabeticals=True): """This function uses the unicode table to replace any non-ascii character (identified with its unicode code point) with a LaTeX command. It also parses the formula for commands as e.g. \\\text or \\mbox and applies text-mode commands within them. This allows the conversion of formulas with unicode maths with old-style LaTeX2e, which gleetex depends on.""" if not any(ord(ch) > 160 for ch in formula): return formula # no umlauts, no replacement # characters in math mode need a different replacement than in text mode. # Therefore, the string has to be split into parts of math and text mode. chunks = [] if not ('\\text' in formula or '\\mbox' in formula): # no text mode, so tread a chunks = [formula] else: start = 0 while '\\text' in formula[start:] or '\\mbox' in formula[start:]: index = formula[start:].find('\\text') if index < 0: index = formula[start:].find('\\mbox') opening_brace = formula[start + index:].find('{') + start + index # add text before text-alike command and the command itself to chunks chunks.append(formula[start:opening_brace]) closing_brace = get_matching_brace(formula, opening_brace) # add text-mode stuff chunks.append(formula[opening_brace:closing_brace + 1]) start = closing_brace + 1 # add last chunk chunks.append(formula[start:]) is_math = True for index, chunk in enumerate(chunks): try: chunks[index] = replace_unicode_characters(chunk, is_math, replace_alphabeticals=replace_alphabeticals) except ValueError as e: # unicode point missing index = int(e.args[0]) raise DocumentSerializationException(formula, index, ord(formula[index])) from None is_math = not is_math return ''.join(chunks) def replace_unicode_characters(characters, is_math, replace_alphabeticals=True): """Replace all non-ascii characters within the given string with their LaTeX equivalent. The boolean is_math indicates, whether text-mode commands (like in \\text{}) or the amsmath equivalents should be used. When replace_alphabeticals is False, alphabetical characters will not be replaced through their LaTeX command when in text mode, so that text within \\text{} (and similar) is not garbled. For instance, \\text{für} is be replaced by \\text{f\"{u}r} when replace_alphabeticals=True. This is useful for the alt attribute of an image, where the reader might want to read the normal text as such. This function raises a ValueError if a unicode point is not in the table. The first argument of the ValueError is the index within the string, where the unknown unicode character has been encountered.""" result = [] for idx, character in enumerate(characters): if ord(character) < 168: # ignore normal ascii character and unicode control sequences result.append(character) # treat alphanumerical characters differently when in text mode, see doc # string; don't replace alphabeticals if specified elif character.isalpha() and not replace_alphabeticals: result.append(character) else: mode = (unicode.LaTeXMode.mathmode if is_math else unicode.LaTeXMode.textmode) commands = unicode.unicode_table.get(ord(character)) if not commands: # unicode point missing in table # is catched one level above; provide index for more concise error output raise ValueError(characters.index(character)) # if math mode and only a text alternative exists, add \\text{} # around it if mode == unicode.LaTeXMode.mathmode and mode not in commands: result.append('\\text{%s}' % commands[unicode.LaTeXMode.textmode]) else: result.append(commands[mode]) # if the next character is alphabetical, add space if (idx+1) < len(characters) and characters[idx+1].isalpha() \ and commands[mode][-1].isalpha(): result.append(' ') return ''.join(result) def get_matching_brace(string, pos_of_opening_brace): if string[pos_of_opening_brace] != '{': raise ValueError("index %s in string %s: not a opening brace" % \ (pos_of_opening_brace, repr(string))) counter = 1 for index, ch in enumerate(string[pos_of_opening_brace + 1:]): if ch == '{': counter += 1 elif ch == '}': counter -= 1 if counter == 0: return pos_of_opening_brace + index + 1 if counter != 0: raise ValueError("Unbalanced braces in formula " + repr(string)) #pylint: disable=too-many-instance-attributes class LaTeXDocument: """This class represents a LaTeX document. It is intended to contain an equation as main content and properties to customize it. Its main purpose is to provide a str method which will serialize it to a full LaTeX document. """ def __init__(self, eqn): self.__encoding = None self.__equation = eqn self.__displaymath = False self.__fontsize = 12 self.__background_color = None self.__foreground_color = None self._preamble = '' self.__maths_env = None self.__replace_nonascii = False def _parse_color(self, color): # could be a valid color name try: # hex number? return int(color, 16) except ValueError: return color # treat as normal dvips compatible colour name def set_background_color(self, color): """Set the background color. The `color` can be either a valid dvips name or a tuple with RGB values between 0 and 1. If unset, the image will be transparent.""" self.__background_color = self._parse_color(color) def set_foreground_color(self, color): """Set the foreground color. The `color` can be either a valid dvips name or a tuple with RGB values between 0 and 1. If unset, the text will be black.""" self.__foreground_color = self._parse_color(color) def set_replace_nonascii(self, flag): """If True, all non-ascii character will be replaced through a LaTeX command.""" self.__replace_nonascii = flag def set_latex_environment(self, env): """Set maths environment name like `displaymath` or `flalign*`.""" self.__maths_env = env def get_latex_environment(self): return self.__maths_env def get_encoding(self): """Return encoding for the document (or None).""" return self.__encoding def set_preamble_string(self, p): """Set the string to add to the preamble of the LaTeX document.""" self._preamble = p def set_encoding(self, encoding): """Set the encoding as used by the inputenc package.""" if encoding.lower().startswith('utf') and '8' in encoding: self.__encoding = 'utf8' elif (encoding.lower().startswith('iso') and '8859' in encoding) or \ encoding.lower() == 'latin1': self.__encoding = 'latin1' else: # if you plan to add an encoding, you have to adjust the str # function, which also loads the fontenc package raise ValueError(("Encoding %s is not supported at the moment. If " "you want to use LaTeX 2e, you should report a bug at the home " "page of GladTeX.") % encoding) def set_displaymath(self, flag): """Set whether the formula is set in displaymath.""" if not isinstance(flag, bool): raise TypeError("Displaymath parameter must be of type bool.") self.__displaymath = flag def is_displaymath(self): return self.__displaymath def _get_encoding_preamble(self): # first check whether there are umlauts within the formula and if so, an # encoding has been set if any(ord(ch) > 128 for ch in self.__equation) and \ not self.__replace_nonascii: if not self.__encoding: raise ValueError(("No encoding set, but non-ascii characters " "present. Please specify an encoding.")) encoding_preamble = '' if self.__encoding: # try to guess language and hence character set (fontenc) import locale language = locale.getdefaultlocale() if language and language[0]: # extract just the language code language = language[0].split('_')[0] if not language or not language[0]: language = 'en' # check whether language on computer is within T1 and hence whether # it should be loaded; I know that this can be a misleading # assumption, but there's no better way that I know of if language in ['fr', 'es', 'it', 'de', 'nl', 'ro', 'en']: encoding_preamble += '\n\\usepackage[T1]{fontenc}' else: raise ValueError(("Language not supported by T1 fontenc " "encoding; please report this to the GladTeX project.")) return encoding_preamble def set_fontsize(self, size_in_pt): """Set fontsize in pt, 12 pt by default.""" self.__fontsize = size_in_pt def get_fontsize(self, size_in_pt): return self.__fontsize def __str__(self): preamble = self._get_encoding_preamble() + \ ('\n\\usepackage[utf8]{inputenc}\n\\usepackage{amsmath, amssymb}' '\n') + (self._preamble if self._preamble else '') return self._format_document(preamble) def _format_color_definition(self, which): color = getattr(self, '_%s__%s_color' % (self.__class__.__name__, which)) if not color or isinstance(color, str): return '' return ('\\definecolor{%s}{HTML}{%s}' % (which, hex(color)[2:].upper().zfill(6))) def _format_colors(self): color_defs = (self._format_color_definition('background'), self._format_color_definition('foreground'),) color_body = '' if self.__background_color: color_body += ('\\pagecolor{%s}' % ('background' if color_defs[0] else self.__background_color)) if self.__foreground_color: # opening brace isn't required here, inserted automatically color_body += ('\\color{%s}' % ('foreground' if color_defs[1] else self.__foreground_color)) return (''.join(color_defs), color_body) def _format_document(self, preamble): """Return a formatted LaTeX document with the specified formula embedded.""" opening, closing = None,None if self.__maths_env: opening = '\\begin{%s}' % self.__maths_env closing = '\\end{%s}' % self.__maths_env else: # determine characters with which to surround the formula opening = '\\[' if self.__displaymath else '\\(' closing = '\\]' if self.__displaymath else '\\)' formula = self.__equation.lstrip().rstrip() if self.__replace_nonascii: formula = escape_unicode_maths(formula, replace_alphabeticals=True) fontsize = 'fontsize=%ipt' % self.__fontsize color_preamble, color_body = self._format_colors() return ("\\documentclass[%s, fleqn]{scrartcl}\n\n%s\n" "\\usepackage[dvipsnames]{xcolor}\n" "%s\n" # color definitions, if applicable "\\usepackage[active,textmath,displaymath,tightpage]{preview} " "%% must be last one, see doc\n\n\\begin{document}\n" "\\noindent%%\n" "\\begin{preview}{%s" "%s%s%s}\\end{preview}\n" "\\end{document}\n") % (fontsize, preamble, color_preamble, color_body, opening, formula, closing) def increase_readability(formula, replace_nonascii=False): """In alternate texts for non-image users or those using a screen reader, the LaTeX code should be as readable as possible. Therefore the formula should not contain unicode characters or formatting instructions.""" if replace_nonascii: # keep umlauts, etc; makes the alt more readable, yet wouldn't compile formula = escape_unicode_maths(formula, replace_alphabeticals=False) # replace formatting-only symbols which distract the reader formula_changed = True while formula_changed: formula_changed = False for command in FORMATTING_COMMANDS: idx = formula.find(command) # only replace if it's not after a \\ and not part of a longer command if (idx > 0 and formula[idx-1] != '\\') or idx == 0: end = idx + len(command) # following conditions for replacement must be met: # command doesn't end on alphabet. char. and is followed by same # category OR end of string reached OR command does not # end on # alphabetical char. at all if end >= len(formula) or not command[-1].isalpha() \ or not formula[end].isalpha(): formula = formula[:idx] + ' ' + formula[idx + len(command):] formula = formula.replace(' ', ' ') formula_changed = True return formula