# This file es fontenc2mtp.py # # (c) 2004. Javier Bezos. License: LPPL. # # This file creates mtp files for several # font encodings. # # Very, very quich and dirty, because currently # I'm not still sure how to carry out the Unicode # to font transformation. Due to an otp2ocp error, # {botaccent}<0,> is removed and therefore complex # composite caracter are not recomposed properly. import unicodedata class Unichar: def __init__(self, char): if type(char) == type(u''): self.code = ord(char) else: self.code = char #end self.char = unichr(self.code) try: self.name = unicodedata.name(self.char) except: pass # To be done with a recursive function: tempa = unicodedata.decomposition(unichr(self.code)) if '<' in tempa: tempa = '' else: tempa = tempa.split() #end tempa = [int(x, 16) for x in tempa] self.dec = [] for i in tempa: tempb = unicodedata.decomposition(unichr(i)) if '<' in tempb: tempb = '' else: tempb = tempb.split() #end # if if tempb: tempb = [int(x, 16) for x in tempb] #print tempb self.dec += tempb else: self.dec += [i] #end #end #end # def decompose(self, char): #end def decList(self): return [unicodedata.name(unichr(x)) for x in self.dec] #end def decString(self): return '[%s]' % ']['.join(self.decList()) #end def recList(self): if len(self.dec) == 1: return [] return [unicodedata.name(unichr(x)) for x in self.dec] #end def recString(self): tempa = self.decList() if tempa == []: return '' return '[%s]' % ']['.join(tempa) #end # isAbove, isBottom, #end def printchr(u, botaccent=0): result = '' c = int(u, 16) if botaccent: if unicodedata.combining(unichr(c)) == 230: result = '{botaccent}<0,>' #end #end result += '[%s]' % unicodedata.name(unichr(c)) return result #end def decnames(i): dec = unicodedata.decomposition(unichr(i)) if '<' in dec: dec = '' return ''.join([printchr(x) for x in dec.split()]) #end # underaccents # COMBINING x -> COMBINING x BELOW # ua = ['GRAVE ACCENT', 'ACUTE ACCENT', 'MACRON', 'DIAERESIS', 'CARON', 'CIRCUMFLEX ACCENT', 'BREVE', 'TILDE'] # COMBINING x ABOVE -> COMBINING x BELOW uax = ['DOT', 'RING'] # undersigns # COMBINING x BELOW => \UseMemAccent{u}{x} ug = {'COMMA' : ',', 'DOT' : '.'} # special us = {'CEDILLA' : 'c', 'OGONEK' : 'k'} def makemtp(infile, outfile, scriptranges, gx = ''): sym = {} map = {} recout = '' mapout = '' accout = '' symout = '' decout = '' cmbout = '' sameout = '' scriptranges += [[0xA1, 0xBF], [0x300, 0x36F], [0x2000, 0x206F]] if infile == None: for b, e in scriptranges: for i in range(b, e-1): map[i] = i sym[i] = '' #end #end else: ts1 = open('ts1.mmap') infile = open('%s.mmap' % infile) for l in ts1.readlines(): try: enc, uni, dummy = l.split(None, 2) map[eval(uni)] = eval(enc) sym[eval(uni)] = 'ts1' except: pass #end for l in infile.readlines(): try: enc, uni, dummy = l.split(None, 2) map[eval(uni)] = eval(enc) sym[eval(uni)] = '' except: pass #end ts1.close() infile.close() #end sameout = '' rb = re = 0x20 for i in range(0, 65535): if [1 for j, k in scriptranges if j <= i <= k] or i in map: thechar = Unichar(i) try: n = '[%s]' % unicodedata.name(unichr(i)) except: n = '@"%04X' % i #end iscomp = 0 d = unicodedata.decomposition(unichr(i)) if '<' in d: d = '' d = ''.join([printchr(x) for x in d.split()]) if d and '[' in d[1:]: iscomp = 1 if not d: d = n #if i in map and map[i] == i and not d and not sym[i] and not unicodedata.combining(unichr(i)): if i in map and map[i] == i and not sym[i] and not unicodedata.combining(unichr(i)): if re == i - 1: re = i else: if rb == re: sameout += '|@"%04X' % rb else: sameout += '|@"%04X-@"%04X' % (rb, re) #end rb = re = i #end if len(d) > 40: tmp = d + ' ' #+ ' ' * 40 else: tmp = d.ljust(40) #end if iscomp: recout += '%s => <= %s %s ;\n' % (tmp, n, ('<' in d) and '\\(*+1-1)' or '') #end elif i in map: if sym[i]: n = '[%s]' % unicodedata.name(unichr(i)) symout += '%s => "\UseMemTextSymbol{%s}{%d}";\n' % (n.ljust(30), sym[i].upper(), map[i]) else: cc = unicodedata.combining(unichr(i)) if cc == 230: cmbout += '%s => "\\UseMemAccent{t}{%d}";\n' % (n, map[i]) for uae in ua: if n == '[COMBINING %s]' % uae: cmbout += '%s => "\\UseMemAccent{u}{%d}";\n' \ % ('[COMBINING %s BELOW]' % uae, map[i]) #end #end for uaxe in uax: if n == '[COMBINING %s ABOVE]' % uaxe: cmbout += '%s => "\\UseMemAccent{u}{%d}";\n' \ % ('[COMBINING %s BELOW]' % uaxe, map[i]) #end #end elif cc == 220: cmbout += '%s => "\\UseMemAccent{b}{%d}";\n' % (n, map[i]) elif cc == 202: for use, ust in us.items(): if n == '[COMBINING %s]' % use: cmbout += '%s => "\\UseMemAccent{%s}{%d}";\n' % (n, ust, map[i]) #end #end else: if len(d) > 40: tmp = d + ' ' #+ ' ' * 40 else: tmp = d.ljust(40) #end if iscomp: recout += '%s => <= %s %s ;\n' % (tmp, n, ('<' in d) and '\\(*+1-1)' or '') if i != map[i]: mapout += '%s => @"%02X ;\n' % (n, map[i]) #end else: # Characters below 20 are very often active, ignored or # invalid. However, as a ocp font is concerned they are # valid. We move them to the second page of the PUA and # they will be restored in the _last_ step with \char. if map[i] < 0x20: mapout += '%s => @"%04X ;\n' % (tmp, map[i]+0xe100) else: mapout += '%s => @"%04X ;\n' % (tmp, map[i]) #end #end #end #end d = unicodedata.decomposition(unichr(i)) if '<' in d: d = '' if d: #d = ''.join(['[%s]' % unicodedata.name(unichr(int(x, 16))) for x in d.split()]) #decout += '%s\n => <= %s;\n' % (n, d) decout += '%s\n => <= %s;\n' % (n, thechar.decString()) #end #end #end if rb == re: sameout += '|@"%04X' % rb else: sameout += '|@"%04X-@"%04X' % (rb, re) #end for uge, ugt in ug.items(): cmbout += '%s => "\\UseMemAccent{p}{%s}";\n' \ % ('[COMBINING %s BELOW]' % uge, ugt) #end fo = open('%s-com.mtp' % outfile, 'w') fo.write('% (c) 2001-2004 Javier Bezos\n\n' 'input: 2;\n' 'output: 1;\n\n' 'states: acc, end;\n\n' 'aliases:\n\n' 'topaccent = (@"0300-@"0315 | @"031A-@"031B | @"033D-@"0344);\n' 'botaccent = (@"0316-@"0319 | @"031C-@"0333 | @"0339-@"033C | @"0345);\n' 'overaccent = (@"0334-@"0338);\n' 'accent = (@"0300-@"0345);\n\n' 'expressions:\n\n') if gx: fo.write(gx) fo.write('\n\n% Combine if composed form not in main font\n\n') fo.write('^({accent}) {topaccent} {topaccent} {accent} => <=\n' ' \\4 "c{" \\3 "c{" \\2 "l{" \\1 "}}}" @"1B ;\n' '^({accent}) {topaccent} {botaccent} {topaccent} => <=\n' ' \\3 "c{" \\4 "c{" \\2 "l{" \\1 "}}}" @"1B ;\n' '^({accent}) {botaccent} {topaccent} {topaccent} => <=\n' ' \\2 "c{" \\4 "c{" \\3 "l{" \\1 "}}}" @"1B ;\n' '^({accent}) {topaccent} {accent} => <=\n' ' \\3 "c{" \\2 "l{" \\1 "}}" @"1B ;\n' '^({accent}) {botaccent} {topaccent} => <=\n' ' \\2 "c{" \\3 "l{" \\1 "}}" @"1B ;\n' '^({accent}) {accent} => <=\n' ' \\2 "l{" \\1 "}" @"1B ;\n\n' ' @"1B => ;\n') fo.write(cmbout) #fo.write('\n\n% Decompose\n\n') #fo.write(decout) fo.write('\n\n% PUA -> ASCII\n\n' '@"E125 => "\\MemMoveOtherChar\\%"; % as a special case (37).\n' '(@"E100-@"E17F) => "\\MemMoveOtherChar\\" #(\\1 - @"E100);\n\n') fo.write('\n\n% Glyphs in main font with same value as Unicode characters\n\n') fo.write('(%s) => \\1;' % sameout[1:]) fo.close() fo = open('%s-rec.mtp' % outfile, 'w') fo.write('% (c) 2001-2004 Javier Bezos\n\n' 'input: 2;\n' 'output: 2;\n\n' 'states: acc;\n\n' 'aliases:\n\n' 'topaccent = (@"0300-@"0315 | @"031A-@"031B | @"033D-@"0344);\n' 'botaccent = (@"0316-@"0319 | @"031C-@"0333 | @"0339-@"033C | @"0345);\n' 'overaccent = (@"0334-@"0338);\n' 'accent = (@"0300-@"0345);\n\n' 'expressions:\n\n') fo.write('\n\n% Recompose available symbols\n\n') if not (recout + cmbout): recout = '. => \\1 ;' fo.write(recout) fo.write('\n\n% Unicode to main font, except those below\n\n') fo.write(accout + '\n') fo.write(mapout) fo.write('\n\n% Symbols not in main font. Use Symbol fonts\n\n') fo.write(symout) fo.write('\n\n% Spaces and control symbols\n\n') fo.write('[NO-BREAK SPACE] => "\\nobreakspace ";\n' '[SOFT HYPHEN] => "\\-";\n' '[EN QUAD] => "\\enskip ";\n' '[EM QUAD] => "\\quad ";\n' '[EN SPACE] => "\\enspace ";\n' '[EM SPACE] => "\\quad ";\n' '[THREE-PER-EM SPACE] => "\\kern.333em ";\n' '[FOUR-PER-EM SPACE] => "\\kern.25em ";\n' '[SIX-PER-EM SPACE] => "\\thinspace ";\n' '[FIGURE SPACE] => "\\kern\\fontcharwd\\font`0 ";\n' '[PUNCTUATION SPACE] => "\\kern\\fontcharwd\\font`. ";\n' '[THIN SPACE] => "\\kern.2em ";\n' '[HAIR SPACE] => "\\kern1pt ";\n' '[ZERO WIDTH SPACE] => "\\kern0pt plus\\fontdimen3\\font minus \\fontdimen4\\font ";\n' '[LINE SEPARATOR] => "\\\\";\n' '[PARAGRAPH SEPARATOR] => "\\par ";\n') fo.write('\n\n% Accents are passed to the nest step\n\n') fo.write('\n\n{accent} => \\1;') fo.write('\n\n% Glyphs in main font with same value as Unicode characters\n' '% and PUA\n') fo.write('(%s|@"E100-@"E17F) => \\1;' % sameout[1:]) fo.write('\n\n% Other characters, raise error\n\n') fo.write('. => "\\MemUnknownCharacter{" \\1 "}";') fo.close() fo = open('%s-dec.mtp' % outfile, 'w') fo.write('% (c) 2001-2004 Javier Bezos\n\n' 'input: 2;\n' 'output: 2;\n\n' 'expressions:\n\n') fo.write('\n\n% Decompose\n\n') fo.write(decout) fo.close() #def createmtp(enc, ranges): # encfile = open('%s.mmap' % enc) # scriptranges = ranges makemtp('t1', 't1', [[0x21, 0x2FF], [0x1E00, 0x1EFF]]) makemtp('ot1', 'ot1', [[0x21, 0x2FF], [0x1E00, 0x1EFF]]) makemtp('t2a', 't2a', [[0x400, 0x52F]]) tmp = r""" . [COMBINING COMMA ABOVE][COMBINING ACUTE ACCENT] => ">'" \1; . [COMBINING COMMA ABOVE][COMBINING GRAVE ACCENT] => ">`" \1; . [COMBINING COMMA ABOVE][COMBINING GREEK PERISPOMENI] => ">=" \1; . [COMBINING REVERSED COMMA ABOVE][COMBINING ACUTE ACCENT] => "<'" \1; . [COMBINING REVERSED COMMA ABOVE][COMBINING GRAVE ACCENT] => "<`" \1; . [COMBINING REVERSED COMMA ABOVE][COMBINING GREEK PERISPOMENI] => @"40 \1; . [COMBINING COMMA ABOVE]=> ">" \1; . [COMBINING REVERSED COMMA ABOVE] => "<" \1; . [COMBINING ACUTE ACCENT] => "'" \1; . [COMBINING GRAVE ACCENT] => "`" \1; . [COMBINING GREEK PERISPOMENI] => "=" \1; """ makemtp('lgr', 'lgr', [[0x370, 0x3FF], [0x1F00, 0x1FFF]], gx = tmp) makemtp(None, 'ula', [[0x21, 0x2FF], [0x1E00, 0x1EFF]]) makemtp(None, 'ucy', [[0x21, 0x7F], [0x400, 0x52F]]) makemtp(None, 'uel', [[0x21, 0x7F], [0x370, 0x3FF], [0x1F00, 0x1FFF]])