| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511 |
- from xml.etree.cElementTree import Element
- from .utils import qn
- class OMMLParser:
- """
- Parser class for reading OMML and converting it into LaTeX.
- """
- FUNCTION_MAP = {
- "sin": "\\sin",
- "cos": "\\cos",
- "tan": "\\tan",
- "cot": "\\cot",
- "sec": "\\sec",
- "csc": "\\csc",
- "sinh": "\\sinh",
- "cosh": "\\cosh",
- "tanh": "\\tanh",
- "coth": "\\coth",
- "sech": "\\operatorname{sech}",
- "csch": "\\operatorname{csch}",
- "log": "\\log",
- "ln": "\\ln",
- "min": "\\min",
- "max": "\\max",
- "lim": "\\lim",
- }
- def _normalize_func_name(self, content: str) -> str:
- if not content:
- return content
- if content.startswith("\\"):
- return content
- key = content.strip()
- mapped = self.FUNCTION_MAP.get(key)
- return mapped if mapped else content
- def parse(self, root: Element) -> str:
- """
- Parses an m:oMath OMML tag into LaTeX.
- :param root: An m:oMath OMML tag
- :return: The LaTeX representation of the OMML input
- """
- text = ""
- try:
- if root.tag == qn("m:t"):
- return self.parse_t(root)
- for child in root:
- if child.tag in self.parsers:
- text += self.parsers[child.tag](self, child)
- except AttributeError:
- # In case of missing attributes on OMML tags,
- # we return an empty string (ref:issue_14)
- return ""
- return text
- def parse_e(self, root: Element) -> str:
- text = ""
- for child in root:
- text += self.parse(child)
- return text
- def parse_r(self, root: Element) -> str:
- # TODO: Add support for m:rPr and m:scr to support different character styles
- # For now, we just parse the text content of m:r
- text = ""
- for child in root:
- text += self.parse(child)
- return text
- def parse_t(self, root: Element):
- symbol_map = {
- "≜": "\\triangleq",
- "≝": "\\stackrel{\\tiny def}{=}",
- "≞": "\\stackrel{\\tiny m}{=}",
- }
- replacements = {
- "<": "\\lt ",
- ">": "\\gt ",
- "≤": "\\leq ",
- "≥": "\\geq ",
- "∞": "\\infty ",
- "<": "\\lt ",
- ">": "\\gt ",
- "≤": "\\leq ",
- "≥": "\\geq ",
- }
- text = root.text.split()
- if not text:
- return " "
- for i, t in enumerate(text):
- if t in symbol_map:
- text[i] = symbol_map[t]
- for key, value in replacements.items():
- for i, t in enumerate(text):
- text[i] = t.replace(key, value)
- return " ".join(text)
- def parse_acc(self, root: Element) -> str:
- character_map = {
- 768: "\\grave",
- 769: "\\acute",
- 770: "\\hat",
- 771: "\\tilde",
- 773: "\\bar",
- 774: "\\breve",
- 775: "\\dot",
- 776: "\\ddot",
- 780: "\\check",
- 831: "\\overline{\\overline",
- 8400: "\\overset\\leftharpoonup",
- 8401: "\\overset\\rightharpoonup",
- 8406: "\\overleftarrow",
- 8407: "\\overrightarrow",
- 8411: "\\dddot",
- 8417: "\\overset\\leftrightarrow",
- }
- text = ""
- accent = 770
- for child in root:
- if child.tag == qn("m:accPr"):
- for child2 in child:
- if child2.tag == qn("m:chr"):
- val = child2.attrib.get(qn("m:val"))
- if val:
- try:
- accent = ord(val)
- except TypeError:
- pass
- accent_cmd = character_map.get(accent)
- if accent_cmd is None:
- accent_cmd = character_map.get(770, "\\hat")
- text += accent_cmd + "{"
- for child in root:
- if child.tag == qn("m:e"):
- text += self.parse(child)
- text += "}"
- if accent == 831:
- text += "}"
- return text
- def parse_bar(self, root: Element) -> str:
- text = "\\overline{"
- for child in root:
- if child.tag == qn("m:barPr"):
- for child2 in child:
- if child2.tag == qn("m:pos"):
- if child2.attrib.get(qn("m:val")) == "bot":
- text = "\\underline{"
- for child in root:
- if child.tag == qn("m:e"):
- text += self.parse(child)
- text += "}"
- return text
- def parse_border_box(self, root: Element) -> str:
- text = "\\boxed{"
- for child in root:
- if child.tag == qn("m:e"):
- text += self.parse(child)
- text += "}"
- return text
- def parse_box(self, root: Element) -> str:
- text = ""
- for child in root:
- text += self.parse(child)
- return text
- def parse_group_chr(self, root: Element) -> str:
- character_map = {
- "←": "\\leftarrow",
- "→": "\\rightarrow",
- "↔": "\\leftrightarrow",
- "⇐": "\\Leftarrow",
- "⇒": "\\Rightarrow",
- "⇔": "\\Leftrightarrow",
- }
- text = "\\underbrace{"
- bottom = False
- for child in root:
- if child.tag == qn("m:groupChrPr"):
- for child2 in child:
- if child2.tag == qn("m:chr"):
- char = child2.attrib.get(qn("m:val"))
- if char in character_map:
- text = character_map[char]
- for child2 in child:
- if (
- child2.tag == qn("m:pos")
- and child2.attrib.get(qn("m:val")) == "top"
- ):
- # If m:pos is set to "top", the symbol is supposed to
- # be on top and the text is actually supposed to be under
- bottom = True
- content = ""
- for child in root:
- if child.tag == qn("m:e"):
- content = self.parse(child)
- if text == "\\underbrace{":
- if bottom:
- text = "\\overbrace{" + content + "}"
- else:
- text += content + "}"
- else:
- if not bottom:
- text = "\\overset{" + content + "}" + "{" + text + "}"
- else:
- text = "\\underset{" + content + "}" + "{" + text + "}"
- return text
- def parse_d(self, root: Element) -> str:
- bracket_map = {
- "(": "\\left(",
- ")": "\\right)",
- "[": "\\left[",
- "]": "\\right]",
- "{": "\\left{",
- "}": "\\right}",
- "〈": "\\left\\langle",
- "〉": "\\right\\rangle",
- "⟨": "\\left\\langle",
- "⟩": "\\right\\rangle",
- "⌊": "\\left\\lfloor",
- "⌋": "\\right\\rfloor",
- "⌈": "\\left\\lceil",
- "⌉": "\\right\\rceil",
- "|": "\\left|",
- "‖": "\\left\\|",
- "⟦": "[\\![",
- "⟧": "]\\!]",
- }
- text = ""
- start_bracket = "("
- end_bracket = ")"
- seperator = "|"
- is_matrix = False
- for child in root:
- for child2 in child:
- if child.tag == qn("m:dPr"):
- if child2.tag == qn("m:begChr"):
- start_bracket = child2.attrib.get(qn("m:val"))
- if child2.tag == qn("m:endChr"):
- end_bracket = child2.attrib.get(qn("m:val"))
- if child2.tag == qn("m:sepChr"):
- seperator = child2.attrib.get(qn("m:val"))
- if child2.tag == qn("m:m"):
- is_matrix = True
- for child in root:
- if child.tag == qn("m:e"):
- if text:
- text += seperator
- text += self.parse(child)
- end_bracket_replacements = {
- "|": "\\right|",
- "‖": "\\right\\|",
- "[": "\\right[",
- }
- start_bracket_replacements = {
- "]": "\\left]",
- }
- start = ""
- end = ""
- if start_bracket:
- if start_bracket in start_bracket_replacements:
- start = start_bracket_replacements[start_bracket] + " "
- elif start_bracket in bracket_map:
- start = bracket_map[start_bracket] + " "
- else:
- start = "\\left(" + " "
- if end_bracket:
- if end_bracket in end_bracket_replacements:
- end = " " + end_bracket_replacements[end_bracket]
- elif end_bracket in bracket_map:
- end = " " + bracket_map[end_bracket]
- else:
- end = " " + "\\right)"
- # If there is no end bracket and this tag contains an m:eqArr tag as a
- # child, we assume that the eqArr should be translated to a cases environment
- # instead of an eqnarray* environment.
- else:
- for child in root:
- if child.tag == qn("m:e"):
- for child2 in child:
- if child2.tag == qn("m:eqArr"):
- text = text.replace("\\begin{eqnarray*}", "")
- text = text.replace("\\end{eqnarray*}", "")
- return "\\begin{cases} " + text + " \\end{cases}"
- if is_matrix:
- if start_bracket == "(" and end_bracket == ")":
- return text.replace("{matrix}", "{pmatrix}")
- elif start_bracket == "|" and end_bracket == "|":
- return text.replace("{matrix}", "{vmatrix}")
- elif start_bracket == "‖" and end_bracket == "‖":
- return text.replace("{matrix}", "{Vmatrix}")
- else:
- return text.replace("{matrix}", "{bmatrix}")
- return start + text + end
- def parse_eq_arr(self, root: Element) -> str:
- text = "\\begin{eqnarray*}"
- for child in root:
- if child.tag == qn("m:e"):
- text += self.parse(child) + " \\\\"
- text += "\\end{eqnarray*}"
- return text
- def parse_f(self, root: Element) -> str:
- text = "\\frac{"
- num = ""
- den = ""
- is_binom = False
- for child in root:
- if child.tag == qn("m:fPr"):
- for child2 in child:
- if (
- child2.tag == qn("m:type")
- and child2.attrib.get(qn("m:val")) == "noBar"
- ):
- is_binom = True
- if child.tag == qn("m:num"):
- num = self.parse(child)
- if child.tag == qn("m:den"):
- den = self.parse(child)
- if is_binom:
- text = "\\genfrac{}{}{0pt}{}{"
- text += num + "}{" + den + "}"
- return text
- def parse_m(self, root: Element) -> str:
- text = "\\begin{matrix} "
- text += self.parse(root)[:-3] # Remove the last ' \\'
- text += "\\end{matrix}"
- return text
- def parse_mr(self, root: Element) -> str:
- text = ""
- for child in root:
- if child.tag == qn("m:e"):
- text += self.parse(child) + " & "
- return text[:-2] + "\\\\ " # Remove the last ' & '
- def parse_func(self, root: Element) -> str:
- subscript = ""
- superscript = ""
- text = ""
- func_name = "sin"
- for child in root:
- if child.tag == qn("m:fName"):
- for child2 in child:
- if child2.tag in [qn("m:sSup"), qn("m:sSub"), qn("m:r")]:
- for child3 in child2:
- if child3.tag == qn("m:sub"):
- subscript = self.parse(child3)
- if child3.tag == qn("m:sup"):
- superscript = self.parse(child3)
- if child3.tag == qn("m:t") or child3.tag == qn("m:e"):
- func_name = self.parse(child3)
- elif child2.tag == qn("m:limLow"):
- for child3 in child2:
- if child3.tag == qn("m:lim"):
- for child4 in child3:
- subscript += self.parse(child4)
- if child3.tag == qn("m:e"):
- func_name = self.parse(child3)
- if child.tag == qn("m:e"):
- text += self.parse(child)
- if func_name in ["lim", "max", "min"]:
- return f"\\{func_name}\\limits_{{{subscript}}}^{{{superscript}}}{{{text}}}"
- if func_name not in self.FUNCTION_MAP:
- return f"{{{func_name}}}^{{{superscript}}}_{{{subscript}}}{{{text}}}"
- return (
- self.FUNCTION_MAP[func_name]
- + f"_{{{subscript}}}^{{{superscript}}}{{{text}}}"
- )
- def parse_s_sup(self, root: Element) -> str:
- content = ""
- exp_content = ""
- for child in root:
- if child.tag == qn("m:e"):
- content = self.parse(child)
- if child.tag == qn("m:sup"):
- exp_content = self.parse(child)
- content = self._normalize_func_name(content)
- return f"{{{content}}}^{{{exp_content}}}"
- def parse_s_sub(self, root: Element) -> str:
- content = ""
- sub_content = ""
- for child in root:
- if child.tag == qn("m:e"):
- content = self.parse(child)
- if child.tag == qn("m:sub"):
- sub_content = self.parse(child)
- content = self._normalize_func_name(content)
- return f"{{{content}}}_{{{sub_content}}}"
- def parse_s_sub_sup(self, root: Element) -> str:
- content = ""
- sub_content = ""
- exp_content = ""
- for child in root:
- if child.tag == qn("m:e"):
- content = self.parse(child)
- if child.tag == qn("m:sub"):
- sub_content = self.parse(child)
- if child.tag == qn("m:sup"):
- exp_content = self.parse(child)
- content = self._normalize_func_name(content)
- return f"{{{content}}}_{{{sub_content}}}^{{{exp_content}}}"
- def parse_s_pre(self, root: Element) -> str:
- content = ""
- sub_content = ""
- exp_content = ""
- for child in root:
- if child.tag == qn("m:e"):
- content = self.parse(child)
- if child.tag == qn("m:sub"):
- sub_content = self.parse(child)
- if child.tag == qn("m:sup"):
- exp_content = self.parse(child)
- return "{}^{" + exp_content + "}_{" + sub_content + "}{" + content + "}"
- def parse_rad(self, root: Element) -> str:
- content = ""
- order = ""
- for child in root:
- if child.tag == qn("m:deg"):
- order = self.parse(child)
- if child.tag == qn("m:e"):
- content += self.parse(child)
- if order:
- return f"\\sqrt[{order}]{{{content}}}"
- return f"\\sqrt{{{content}}}"
- def parse_nary(self, root: Element) -> str:
- character_map = {
- 8719: "\\prod",
- 8720: "\\coprod",
- 8721: "\\sum",
- 8747: "\\int",
- 8748: "\\iint",
- 8749: "\\iiint",
- 8750: "\\oint",
- 8751: "\\oiint",
- 8752: "\\oiiint",
- 8896: "\\bigwedge",
- 8897: "\\bigvee",
- 8898: "\\bigcap",
- 8899: "\\bigcup",
- }
- char = 8747
- for child in root:
- if child.tag == qn("m:naryPr"):
- for child2 in child:
- if child2.tag == qn("m:chr"):
- val = child2.attrib.get(qn("m:val"))
- if val:
- try:
- char = ord(val)
- except TypeError:
- pass
- text = character_map.get(char, character_map[8721])
- sub = ""
- sup = ""
- content = ""
- for child in root:
- if child.tag == qn("m:sub"):
- sub = self.parse(child)
- if child.tag == qn("m:sup"):
- sup = self.parse(child)
- if child.tag == qn("m:e"):
- content = self.parse(child)
- if sub:
- text += f"_{{{sub}}}"
- if sup:
- text += f"^{{{sup}}}"
- text += "{" + content + "}"
- return text
- parsers = {
- qn("m:r"): parse_r,
- qn("m:acc"): parse_acc,
- qn("m:borderBox"): parse_border_box,
- qn("m:bar"): parse_bar,
- qn("m:box"): parse_box,
- qn("m:d"): parse_d,
- qn("m:e"): parse_e,
- qn("m:groupChr"): parse_group_chr,
- qn("m:f"): parse_f,
- qn("m:sSup"): parse_s_sup,
- qn("m:sSub"): parse_s_sub,
- qn("m:sSubSup"): parse_s_sub_sup,
- qn("m:sPre"): parse_s_pre,
- qn("m:t"): parse_t,
- qn("m:rad"): parse_rad,
- qn("m:nary"): parse_nary,
- qn("m:eqArr"): parse_eq_arr,
- qn("m:func"): parse_func,
- qn("m:m"): parse_m,
- qn("m:mr"): parse_mr,
- }
|