Source code for pyenzyme.enzymeml.tools.unitparser

# File: unitparser.py
# Project: tools
# Author: Jan Range
# License: BSD-2 clause
# Copyright (c) 2022 Institute of Biochemistry and Technical Biochemistry Stuttgart

import re


[docs]class UnitParser(object):
    def __init__(self):

        self.__prefixDict = {
            "femto": "f",
            "pico": "p",
            "nano": "n",
            "micro": "u",
            "milli": "m",
            "mili": "m",
            "centi": "c",
            "deci": "d",
            "kilo": "k",
        }

[docs]    def parse(self, exp_string):

        # reformat string
        exp_string = self.__exponentString(exp_string)

        # split by exponents
        regex = r"([a-zA-Z]*)([-+][\d]*)"
        regex = regex.replace(" ", "")

        unit_tup = re.findall(regex, exp_string)
        return [self.__getPrefix(tup[0], tup[-1]) for tup in unit_tup if tup[0]]

[docs]    def getExponentString(self, string):

        string = string.split("/")

        if len(string) == 2:
            nom = string[0].split(" ")[0:-1]
            den = string[1].split(" ")[1::]

        elif len(string) == 1:
            nom = string
            den = []

        return "".join(
            [self.__reformatString(unit, "+") for unit in nom]
            + [self.__reformatString(unit, "-") for unit in den]
        )

    def __reformatString(self, string, pre):
        regex = r"(\w*)[-+|\^]?(\d*)"
        groups = re.findall(regex, string)
        exp_string = ""

        for unit, exponent in groups:

            if len(unit) > 0:

                if len(exponent) == 1:
                    exp_string += unit + pre + exponent

                elif "+" in exponent or "-" in exponent:
                    exp_string += unit + exponent

                else:
                    exp_string += unit + "%s1" % pre

        return exp_string

    def __exponentString(self, string):

        string = [st.strip() for st in string.split("/")]

        if len(string) == 2:
            nom = string[0].split(" ")
            den = string[1].split(" ")

        elif len(string) == 1:
            nom = string
            den = []

        return "".join(
            [self.__reformatString(unit, "+") for unit in nom]
            + [self.__reformatString(unit, "-") for unit in den]
        )

    def __getPrefix(self, string, exponent):

        regex = "^([a|f|p|n|u|m|c|d|k]?)(C|celsius|K|kelvin|M|molar|mole|g|gram|l|L|litre|liter|[s]?|sec|seconds|second|min|mins|minutes|h|hour|hours|dimensionless)$"
        string = string.lower()[0:-1] + string[-1]

        try:
            prefix = re.findall(regex, string)[0][0]

            if len(prefix) > 1:
                prefix = self.__prefixDict[prefix.lower()]

            unit = re.findall(regex, string)[0][1]

            return (prefix, unit, exponent)

        except IndexError:

            try:
                unit = re.findall(regex, string)[0][0]
                return (None, unit, exponent)
            except IndexError:
                supportedUnits = regex.split()
                raise KeyError(
                    f'Could not parse unit, because "{string}" is not supported. PyEnzyme currently supports the following {self.__getSupportUnitString(regex)}'
                )

    @staticmethod
    def __getSupportUnitString(regex):
        prefixes, units = tuple(regex.split(")("))

        units = units.replace("[s]?", "s").replace(")$", "").split("|")
        prefixes = prefixes.replace("^([", "").replace("]?", "").split("|")

        return f"prefixes [{', '.join(prefixes)}] and units [{', '.join(units)}]"