Source code for pyenzyme.enzymeml.tools.unitparser

# File: unitparser.py
# Project: tools
# Author: Jan Range
# License: BSD-2 clause
# Copyright (c) 2022 Institute of Biochemistry and Technical Biochemistry Stuttgart

import re


[docs]class UnitParser(object): def __init__(self): self.__prefixDict = { "femto": "f", "pico": "p", "nano": "n", "micro": "u", "milli": "m", "mili": "m", "centi": "c", "deci": "d", "kilo": "k", }
[docs] def parse(self, exp_string): # reformat string exp_string = self.__exponentString(exp_string) # split by exponents regex = r"([a-zA-Z]*)([-+][\d]*)" regex = regex.replace(" ", "") unit_tup = re.findall(regex, exp_string) return [self.__getPrefix(tup[0], tup[-1]) for tup in unit_tup if tup[0]]
[docs] def getExponentString(self, string): string = string.split("/") if len(string) == 2: nom = string[0].split(" ")[0:-1] den = string[1].split(" ")[1::] elif len(string) == 1: nom = string den = [] return "".join( [self.__reformatString(unit, "+") for unit in nom] + [self.__reformatString(unit, "-") for unit in den] )
def __reformatString(self, string, pre): regex = r"(\w*)[-+|\^]?(\d*)" groups = re.findall(regex, string) exp_string = "" for unit, exponent in groups: if len(unit) > 0: if len(exponent) == 1: exp_string += unit + pre + exponent elif "+" in exponent or "-" in exponent: exp_string += unit + exponent else: exp_string += unit + "%s1" % pre return exp_string def __exponentString(self, string): string = [st.strip() for st in string.split("/")] if len(string) == 2: nom = string[0].split(" ") den = string[1].split(" ") elif len(string) == 1: nom = string den = [] return "".join( [self.__reformatString(unit, "+") for unit in nom] + [self.__reformatString(unit, "-") for unit in den] ) def __getPrefix(self, string, exponent): regex = "^([a|f|p|n|u|m|c|d|k]?)(C|celsius|K|kelvin|M|molar|mole|g|gram|l|L|litre|liter|[s]?|sec|seconds|second|min|mins|minutes|h|hour|hours|dimensionless)$" string = string.lower()[0:-1] + string[-1] try: prefix = re.findall(regex, string)[0][0] if len(prefix) > 1: prefix = self.__prefixDict[prefix.lower()] unit = re.findall(regex, string)[0][1] return (prefix, unit, exponent) except IndexError: try: unit = re.findall(regex, string)[0][0] return (None, unit, exponent) except IndexError: supportedUnits = regex.split() raise KeyError( f'Could not parse unit, because "{string}" is not supported. PyEnzyme currently supports the following {self.__getSupportUnitString(regex)}' ) @staticmethod def __getSupportUnitString(regex): prefixes, units = tuple(regex.split(")(")) units = units.replace("[s]?", "s").replace(")$", "").split("|") prefixes = prefixes.replace("^([", "").replace("]?", "").split("|") return f"prefixes [{', '.join(prefixes)}] and units [{', '.join(units)}]"