# File: enzymemldocument.py
# Project: core
# Author: Jan Range
# License: BSD-2 clause
# Copyright (c) 2022 Institute of Biochemistry and Technical Biochemistry Stuttgart
import os
import re
import ast
import json
import yaml
import sys
import logging
import pandas as pd
import seaborn as sns
import plotly.express as px
from pydantic import Field, PrivateAttr, validator, validate_arguments
from typing import Dict, List, Tuple, TYPE_CHECKING, Optional, Union
from dataclasses import dataclass
from io import StringIO
from pyenzyme.enzymeml.core.enzymemlbase import EnzymeMLBase
from pyenzyme.enzymeml.core.abstract_classes import AbstractSpecies
from pyenzyme.enzymeml.core.reactant import Reactant
from pyenzyme.enzymeml.core.creator import Creator
from pyenzyme.enzymeml.core.protein import Protein
from pyenzyme.enzymeml.core.complex import Complex
from pyenzyme.enzymeml.core.vessel import Vessel
from pyenzyme.enzymeml.core.unitdef import UnitDef
from pyenzyme.enzymeml.core.measurement import Measurement
from pyenzyme.enzymeml.core.measurementData import MeasurementData
from pyenzyme.enzymeml.core.enzymereaction import EnzymeReaction
from pyenzyme.enzymeml.models.kineticmodel import KineticParameter
from pyenzyme.enzymeml.tools.unitcreator import UnitCreator
from pyenzyme.enzymeml.tools.enzymemlwriter import EnzymeMLWriter
from pyenzyme.enzymeml.tools.templatereader import read_template
from pyenzyme.enzymeml.tools.validator import EnzymeMLValidator
from pyenzyme.enzymeml.core.ontology import EnzymeMLPart, SBOTerm
from pyenzyme.utils.log import setup_custom_logger, log_object
from pyenzyme.enzymeml.core.exceptions import SpeciesNotFoundError
from pyenzyme.enzymeml.core.utils import type_checking, deprecated_getter
if TYPE_CHECKING: # pragma: no cover
static_check_init_args = dataclass
else:
static_check_init_args = type_checking
# Initialize the logger
logger = logging.getLogger("pyenzyme")
[docs]@static_check_init_args
class EnzymeMLDocument(EnzymeMLBase):
name: str = Field(
...,
description="Title of the EnzymeML Document.",
)
level: int = Field(
3,
description="SBML evel of the EnzymeML XML.",
inclusiveMinimum=1,
inclusiveMaximum=3,
)
version: int = Field(
2,
description="SBML version of the EnzymeML XML.",
)
pubmedid: Optional[str] = Field(
None,
description="Pubmed ID reference.",
)
url: Optional[str] = Field(
None,
description="Arbitrary type of URL that is related to the EnzymeML document.",
)
doi: Optional[str] = Field(
None,
description="Digital Object Identifier of the referenced publication or the EnzymeML document.",
)
created: Optional[str] = Field(
None,
description="Date the EnzymeML document was created.",
)
modified: Optional[str] = Field(
None,
description="Date the EnzymeML document was modified.",
)
creator_dict: Dict[str, Creator] = Field(
alias="creators",
default_factory=dict,
description="Dictionary mapping from creator IDs to creator describing objects.",
)
vessel_dict: Dict[str, Vessel] = Field(
alias="vessels",
default_factory=dict,
description="Dictionary mapping from vessel IDs to vessel describing objects.",
)
protein_dict: Dict[str, Protein] = Field(
alias="proteins",
default_factory=dict,
description="Dictionary mapping from protein IDs to protein describing objects.",
)
complex_dict: Dict[str, Complex] = Field(
alias="complexes",
default_factory=dict,
description="Dictionary mapping from complex IDs to complex describing objects.",
)
reactant_dict: Dict[str, Reactant] = Field(
alias="reactants",
default_factory=dict,
description="Dictionary mapping from reactant IDs to reactant describing objects.",
)
reaction_dict: Dict[str, EnzymeReaction] = Field(
alias="reactions",
default_factory=dict,
description="Dictionary mapping from reaction IDs to reaction describing objects.",
)
measurement_dict: Dict[str, Measurement] = Field(
alias="measurements",
default_factory=dict,
description="Dictionary mapping from measurement IDs to measurement describing objects.",
)
file_dict: Dict[str, dict] = Field(
alias="files",
default_factory=dict,
description="Dictionary mapping from protein IDs to protein describing objects.",
)
global_parameters: Dict[str, KineticParameter] = Field(
alias="global_parameters",
default_factory=dict,
description="Dictionary mapping from parameter IDs to global kinetic parameter describing objects.",
)
log: str = Field(
default="",
)
# * Private attributes
_unit_dict: Dict[str, UnitDef] = PrivateAttr(default_factory=dict)
# ! Validators
[docs] @validator("log")
def start_logger(cls, logs: str, values: dict):
"""Starts a logger instance for the document"""
# Initialite the log stream
log_stream = StringIO()
log_stream.write(logs)
# Initialize the global logger
setup_custom_logger("pyenzyme", log_stream)
return log_stream
[docs] @validator("pubmedid")
def add_identifier(cls, pubmedid: Optional[str]):
"""Adds an identifiers.org link in front of the pubmed ID if not given"""
if pubmedid is None:
return pubmedid
elif pubmedid.startswith("https://identifiers.org/pubmed:"):
return pubmedid
else:
return "https://identifiers.org/pubmed:" + pubmedid
# ! Imports and exports
[docs] @classmethod
def fromTemplate(cls, path: str):
"""Reads an EnzymeML spreadsheet template to an EnzymeMLDocument object.
Args:
path (str): Path to the EnzymeML spreadsheet template.
Returns:
EnzymeMLDocument: Resulting EnzymeML document.
"""
return read_template(path, cls)
[docs] @staticmethod
def fromFile(path: str):
"""Initializes an EnzymeMLDocument from an OMEX container."
Args:
path (Path): Path to the OMEX container.
Returns:
EnzymeMLDocument: The intialized EnzymeML document.
"""
from pyenzyme.enzymeml.tools.enzymemlreader import EnzymeMLReader
return EnzymeMLReader().readFromFile(path)
[docs] @classmethod
def fromJSON(cls, json_string: str):
# First, use PyDantic to get a raw model
enzmldoc = cls.parse_obj(json.loads(json_string))
# Recreate to get unitDefs and logs
nu_enzmldoc = cls(
name=enzmldoc.name,
level=enzmldoc.level,
version=enzmldoc.version,
pubmedid=enzmldoc.pubmedid,
url=enzmldoc.url,
doi=enzmldoc.doi,
created=enzmldoc.created,
modified=enzmldoc.modified,
)
# Creators
for creator in enzmldoc.creator_dict.values():
nu_enzmldoc.addCreator(creator)
# Vessels
for vessel in enzmldoc.vessel_dict.values():
nu_enzmldoc.addVessel(vessel)
# Proteins
for protein in enzmldoc.protein_dict.values():
nu_enzmldoc.addProtein(protein)
# Reactants
for reactant in enzmldoc.reactant_dict.values():
nu_enzmldoc.addReactant(reactant)
# Complexes
for complex in enzmldoc.complex_dict.values():
nu_enzmldoc._add_complex(complex)
# Reactions
for reaction in enzmldoc.reaction_dict.values():
nu_enzmldoc.addReaction(reaction)
# Measurements
for measurement in enzmldoc.measurement_dict.values():
nu_measurement = Measurement(
name=measurement.name,
temperature=measurement.temperature,
temperature_unit=measurement.temperature_unit,
ph=measurement.ph,
global_time_unit=measurement.global_time_unit,
)
cls._parse_measurement_data(
measurement, "proteins", nu_measurement, nu_enzmldoc
)
cls._parse_measurement_data(
measurement, "reactants", nu_measurement, nu_enzmldoc
)
nu_enzmldoc.addMeasurement(nu_measurement)
return nu_enzmldoc
@staticmethod
def _parse_measurement_data(measurement, key, nu_measurement, enzmldoc):
"""Parses measurement data for the fromJSON method"""
for measurement_data in measurement.species_dict[key].values():
nu_measurement.addData(
init_conc=measurement_data.init_conc,
unit=measurement_data.unit,
protein_id=measurement_data.protein_id,
reactant_id=measurement_data.reactant_id,
)
nu_measurement.addReplicates(measurement_data.replicates, enzmldoc=enzmldoc)
[docs] def toFile(self, path: str, name: Optional[str] = None):
"""Saves an EnzymeML document to an OMEX container at the specified path
Args:
path (Path): Path where the document should be saved.
verbose (PositiveInt, optional): Level of verbosity, in order to print a message and the resulting path. Defaults to 1.
"""
EnzymeMLWriter().toFile(self, path, name)
[docs] def toXMLString(self):
"""Generates an EnzymeML XML string"""
return EnzymeMLWriter().toXMLString(self)
[docs] @validate_arguments
def uploadToDataverse(
self,
dataverse_name: str,
base_url: Optional[str] = None,
api_token: Optional[str] = None,
):
"""Uploads an EnzymeML document to a Dataverse installation of choice.
It should be noted, that the environment variables 'DATAVERSE_URL' and 'DATAVERSE_API_TOKEN'
should be given approriately before the upload. If not, tje upload cant be done.
Args:
dataverse_name (str): Name of the dataverse to upload the EnzymeML document. You can find the name in the link of your dataverse (e.g. https://dataverse.installation/dataverse/{dataverseName})
base_url (str): Base URL of the dataverse to upload. Defaults to None. If None the URL will be drawn from env vars.
api_token (str): API Token of the dataverse to upload. Defaults to None. If None the API Token will be drawn from env vars.
"""
try:
from pyenzyme.enzymeml.databases.dataverse import uploadToDataverse
except ModuleNotFoundError:
raise ModuleNotFoundError(
"PyDaRUS has not been found. Please make sure to install 'pyDaRUS' to use the dataverse upload."
)
uploadToDataverse(
enzmldoc=self,
dataverse_name=dataverse_name,
base_url=base_url,
api_token=api_token,
)
# ! Utility methods
[docs] def visualize(
self,
measurement_ids: List[str] = ["all"],
interactive: bool = False,
use_names: bool = False,
sharey: bool = True,
col_wrap: int = 4,
trendline: bool = False,
width: int = 1000,
height: int = 500,
hovermode: str = "closest",
**kwargs,
):
"""Visualizes either all or selected measurements found in the EnzymeML document as FacetGrid or interactive.
In order to use this method correctly, make sure to pass nothing to 'measurement_ids' when all meassurements
should be visualised. Otherwise pass a list or string for multiple or single measurements respectively.
Args:
measurement_ids (List[str], optional): List of measurements that should be plotted or all. Defaults to ["all"].
interactive (bool, optional): [description]. Whether to return an interatcive or static plot. Defaults to to False.
use_names (bool, optional): Whether names or IDs should be used. Defaults to False.
sharey (bool, optional): Whether all plots in FacetGrid should share the y-axis. Defaults to True.
col_wrap (int, optional): Specifies in FacetGrid at which number of cols to create a new row. Defaults to 4.
trendline (bool, optional): Whether the plot should include a trendline. Defaults to False.
width (int, optional): Interactive plot width. Defaults to 1000.
height (int, optional): Interactive plot height. Defaults to 500.
hovermode (str, optional): Changes behaviour of hovering. Following options are available ['closest', 'x unified', 'x', 'y', 'y unified']. Defaults to 'closest'.
Returns:
[type]: [description]
"""
if isinstance(measurement_ids, str):
measurement_ids = [measurement_ids]
# Allow for custom templates if specified
if interactive:
kwargs["template"] = "plotly_white"
df = self.toDataFrame(use_names=use_names, measurement_ids=measurement_ids)
if interactive:
return self._create_interactive_plot(
df=df,
trendline=trendline,
width=width,
height=height,
hovermode=hovermode,
**kwargs,
)
return self._create_facet_grid(
df=df, trendline=trendline, col_wrap=col_wrap, sharey=sharey, **kwargs
)
def _create_facet_grid(
self,
df: pd.DataFrame,
col_wrap: int,
sharey: bool,
trendline: bool,
**kwargs,
):
# Set up the FacetGrid plot
g = sns.FacetGrid(
df,
col="measurement",
hue="species",
col_wrap=col_wrap,
sharey=sharey,
legend_out=True,
**kwargs,
)
if trendline:
g.map(sns.lineplot, "time", "value")
g.map(sns.scatterplot, "time", "value")
g.add_legend(
loc="upper right",
bbox_to_anchor=(0.5, -0.01),
fancybox=True,
shadow=True,
ncol=2,
)
return g
def _create_interactive_plot(
self,
df: pd.DataFrame,
trendline: bool,
width: int,
height: int,
hovermode: str,
**kwargs,
):
"""Visualizes all measurements as an interactive plot based on plotly. Best used in Jupyter Notebooks.
Args:
trendline (bool, optional): Whether or not an average trendline should be drawn. Defaults to False.
width (int, optional): Plot width. Defaults to 1000.
height (int, optional): Plot height. Defaults to 500.
"""
if trendline:
kwargs.update({"trendline": "lowess", "trendline_options": {"frac": 0.5}})
fig = px.scatter(
df,
x="time",
y="value",
animation_frame="measurement",
color="species",
range_y=[-5, df.value.max() + df.value.std()],
width=width,
height=height,
hover_name="species",
**kwargs,
)
fig.update_layout(
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
fig.update_layout(hovermode=hovermode)
return fig
[docs] def toDataFrame(
self,
measurement_ids: List[str] = ["all"],
use_names: bool = False,
) -> pd.DataFrame:
"""Transforms exported measurement data to a single DataFrame
Args:
measurement_ids (List[str], optional): Measurements to include or all of them. Defaults to ["all"].
use_names (bool, optional): Wether names or IDs should be used. Defaults to False.
Returns:
pd.DataFrame: Transformed measurement data.
"""
# First export all the experimental data
data = self.exportMeasurementData()
# Reformat the dataframe for a FacetGrid plot
df_plot = []
for measurement_id, measurement in data.items():
if measurement_id not in measurement_ids and measurement_ids != ["all"]:
# Drop discarded measurements
continue
if use_names:
# Turn ID to name if specified
measurement_id = self.measurement_dict[measurement_id].name
# Get the dataframe from the data export
exp_data = measurement["data"]
# Rename to names if specified
columns = []
for column in exp_data.columns:
if use_names and column not in ["time", "data_unit", "time_unit"]:
columns.append(self.getAny(column).name)
else:
columns.append(column)
# Reset columns
exp_data.columns = columns
# Reduce DataFrame to three columns to hue indicidual species
exp_data = pd.melt(exp_data, id_vars=["time"], var_name="species")
exp_data["measurement"] = [measurement_id] * exp_data.shape[0]
df_plot.append(exp_data)
# Finally, concatenate all indivdidual datasets
df_plot = pd.concat(df_plot)
return df_plot
[docs] def unifyMeasurementUnits(
self, kind: str, scale: int, measurement_ids: Union[str, List[str]] = "all"
) -> None:
"""Rescales and unifies the units of either all measurements or those that are provided to the given kind and scale.
Args:
kind (str): The unit kind from which to rescale. Currently supported: 'mole', 'gram', 'litre'.
scale (int): Decade scale to which the values will be rescaled.
measurement_ids (Union[str, List[str]], optional): Measurements that will be rescaled. Defaults to "all".
"""
# Transform single strings to list
if isinstance(measurement_ids, str):
measurement_ids = [measurement_ids]
for measurement_id, measurement in self.measurement_dict.items():
if measurement_id in measurement_ids or measurement_ids == ["all"]:
measurement.unifyUnits(kind=kind, scale=scale, enzmldoc=self)
[docs] def exportMeasurementData(
self,
measurement_ids: Union[str, List[str]] = "all",
species_ids: Union[str, List[str]] = "all",
proteins: bool = True,
reactants: bool = True,
) -> Dict[str, Dict[str, Union[Tuple, pd.DataFrame]]]:
"""Exports either all replicates present in any measurement or the ones specified via 'species_ids' or 'measurement_ids'
Args:
measurement_ids (Union[str, List[str]], optional): The measurements from which to export the data. Defaults to "all".
species_ids (Union[str, List[str]], optional): The species from which to export the data. Defaults to "all".
Returns:
Dict[str, Dict[str, Union[tuple, pd.DataFrame]]]: The data corresponding to the specified options. The dictionary will still distinguish between meassuremnts.
"""
if proteins is False and reactants is False:
raise ValueError(
"Export of data needs at least one of 'protein' and 'reactants' specified. Otherwise no data can be exported."
)
if isinstance(measurement_ids, str):
measurement_ids = [measurement_ids]
if isinstance(species_ids, str):
species_ids = [species_ids]
# Initialize return list
replicate_data = {}
for measurement_id, measurement in self.measurement_dict.items():
if measurement_id in measurement_ids or measurement_ids == ["all"]:
data = measurement.exportData(species_ids=species_ids)
# Initialize the data dict that will be returned
df = {}
init_conc = {}
if reactants:
df.update(data["reactants"]["data"].to_dict())
init_conc.update(data["reactants"]["initConc"])
if proteins:
df.update(data["proteins"]["data"].to_dict())
init_conc.update(data["proteins"]["initConc"])
replicate_data[measurement_id] = {
"data": pd.DataFrame(df),
"initConc": init_conc,
}
if len(measurement_ids) == 1 and measurement_ids[0] != "all":
# If its only a single measurement, return it directly
return replicate_data
return replicate_data
[docs] def exportKineticParameters(
self, exclude_constant: bool = False, as_dataframe: bool = True
):
"""Exports all kinetic parameters found in the EnzymeMLDocument
Args:
reactions (List[str]): Reactions from which the parameters are to be exported.
Returns:
Dict: Mapping from parameter name to estimated value
"""
# Export local parameters
params = [
{"reaction": reaction.id, **param.dict(exclude={"ontology"})}
for reaction in self.reaction_dict.values()
for param in reaction.model.parameters
if reaction.model and not param.is_global
]
# Global parameters
params += [
{"reaction": "global", **param.dict(exclude={"ontology"})}
for param in self.global_parameters.values()
]
# Create param report
param_report = pd.DataFrame(params).set_index("reaction", inplace=False)
if exclude_constant:
return param_report[param_report.constant == False]
return param_report
@staticmethod
def _generateID(prefix: str, dictionary: dict) -> str:
"""Generates IDs complying to the [s|p|r|m|u|c]?[digit]+ schema.
Args:
prefix (str): Character denoting the type of species (p: Protein, s: Reactant, u: UnitDef, r: EnzymeReaction, m: Measurement, c: concentration).
dictionary (dict): The dictionary from which the ID is generated and used to determine the number.
Returns:
str: Unique internal identifier.
"""
if dictionary.keys():
# fetch all keys and sort them
number = int(max(list(dictionary.keys()), key=lambda id: int(id[1::]))[1::])
return prefix + str(number + 1)
return prefix + str(0)
[docs] def validateDocument(self, yaml_path: str) -> Tuple[Dict, bool]:
"""Validates an EnzymeML based on a given YAML file.
The YAML file should be compliant with PyEnzymes template found on Github
or generated via the EnzymeMLValidator instance. Ultimately, it can also
be derived from a spreadsheet template, which can also be generated via
the EnzymeMLValidator instance.
Args:
yaml_path (str): Path to the Validation YAML file
Returns:
Dict: Report on which fields are incompatible
Bool: Whether or not the document is valid to the given YAML
"""
validator = EnzymeMLValidator(scheme=yaml.safe_load(open(yaml_path)))
return validator.validate(self)
[docs] def checkUnitConsistency(self, strict: bool = False, return_report: bool = True):
"""Validates unit consistency in an EnzymeMLDocument.
This method will check whether all (initial) concentration units of a species
are consistent throughout the document. Default mode only requires measurements and
replicates to comply to the species unit.
This can also be set to 'strict', where any species, measurement,
replicate and parameter has to comply in a global fashion.
To summarise, strict mode checks on:
- Consistent usage of time
- Consistent concentration units for ALL concentrations
- Consistent volumetric unit including vessels
Strict mode is of greates importance for kinetic modeling, differing scales
can lead to wrong results. However, the code will still run and only warnings
will be given.
Args:
strict (bool, optional): Enables strict mode. Defaults to False.
return_report (bool, optional): Whether a report should be returned. Defaults to False.
Returns:
Dict: Report on which units are inconsistent
Bool: Whether the document is consistent in units
"""
is_consistent, report = EnzymeMLValidator.check_unit_consistency(self, strict)
if return_report:
return is_consistent, report
else:
return is_consistent
def __repr__(self):
"""
Magic function return pretty string describing the object.
Returns:
string: Beautified summarization of object
"""
return self.printDocument(stdout=False)
[docs] def printDocument(
self, measurements: bool = False, units: bool = False, stdout: bool = True
) -> Optional[str]:
"""Prints the document's content"""
fin_string: List[str]
def generate_lines(dictionary: dict) -> None:
"""Breaks up a dictionary and generates a human readible line."""
for element_id, element in dictionary.items():
fin_string.append(f"\tID: {element_id} \t Name: {element.name}")
fin_string = [self.name]
if units:
fin_string.append(">>> Units")
generate_lines(self._unit_dict)
fin_string.append(">>> Reactants")
generate_lines(self.reactant_dict)
fin_string.append(">>> Proteins")
generate_lines(self.protein_dict)
fin_string.append(">>> Complexes")
generate_lines(self.complex_dict)
fin_string.append(">>> Reactions")
generate_lines(self.reaction_dict)
if measurements:
fin_string.append(">>> Measurements")
fin_string.append(self.printMeasurements(stdout=False))
output = "\n".join(fin_string)
if stdout:
print(output)
else:
return output
[docs] def printMeasurements(self, stdout: bool = True):
"""Prints an overview of all measurements"""
schemes = []
for measurement in self.measurement_dict.values():
schemes.append(measurement.printMeasurementScheme(stdout=stdout))
if not stdout:
return "\n".join(schemes)
[docs] def printReactionSchemes(self, by_name: bool = True):
"""Prints all reaction equations to inspect the content"""
if len(self.reaction_dict) == 0:
print(">> No reactions present in this EnzymeML Document.", file=sys.stderr)
return
output = []
for reaction in self.reaction_dict.values():
# Get the equation
equation = reaction.get_reaction_scheme(by_name=by_name, enzmldoc=self)
if self.in_ipynb():
output.append(
{
"ID": reaction.id,
"Name": reaction.name,
"equation": equation.split("\n")[1].replace("Equation: ", ""),
"kinetic law": equation.split("\n")[2].replace(
"Model: v = ", ""
),
}
)
else:
output.append(equation)
if self.in_ipynb():
return pd.DataFrame(output).set_index("ID")
else:
print("\n".join(output))
[docs] @staticmethod
def in_ipynb():
"""Checks whether in an ipynb or not"""
try:
cfg = get_ipython().config
if get_ipython().__class__.__name__ == "ZMQInteractiveShell":
return True
else:
return False
except NameError:
return False
[docs] def generateInitialValueTemplate(self, dir: str = ".") -> None:
"""Generates an initial value template as a YAML file, which can be used for modeling.
Args:
dir (str, optional): Dirpath to the output file. Defaults to ".".
"""
init_values = {
"global": {
param.name: {
"initial_value": None,
"constant": param.constant,
"upper": None,
"lower": None,
}
for param in self.global_parameters.values()
}
}
for reaction in self.reaction_dict.values():
if reaction.model is None:
continue
parameters = {
param.name: {
"initial_value": None,
"constant": param.constant,
"upper": None,
"lower": None,
}
for param in reaction.model.parameters
if param.is_global is False
}
if parameters:
init_values[reaction.id] = parameters
# Finally, write the template to YAML
out = os.path.join(dir, self.name.replace(" ", "_") + "_init_values.yaml")
with open(out, "w") as file_handle:
yaml.dump(
init_values, file_handle, default_flow_style=False, sort_keys=False
)
[docs] def applyModelInitialization(self, path: str, to_values: bool = False) -> None:
"""Adds initial values per reaction to the model from a YAML config file.
This method loads a YAML that previously generated from the function 'generateInitialValueTemplate'
and was filled with values. These are then used to populate the 'initial_value' fields of KineticParameter objects.
Args:
path (str): Path to the YAML file containing the initial values.
"""
# Load the YAML file
with open(path, "r") as file_handle:
initial_values = yaml.safe_load(file_handle)
# Apply all given initial values to the model
for reaction_id, value_dict in initial_values.items():
if reaction_id == "global":
for name, options in value_dict.items():
if to_values:
self.global_parameters[name].value = options.get(
"initial_value"
)
self.global_parameters[name].initial_value = options.get(
"initial_value"
)
self.global_parameters[name].upper = options.get("upper")
self.global_parameters[name].lower = options.get("lower")
self.global_parameters[name].constant = options.get("constant")
else:
# Get the reaction
reaction = self.getReaction(reaction_id)
reaction.apply_initial_values(value_dict, to_values=to_values)
# ! Add methods
[docs] @validate_arguments
def addGlobalParameter(
self,
name: str,
value: Optional[float] = None,
initial_value: Optional[float] = None,
unit: Optional[str] = None,
constant: bool = False,
upper: Optional[float] = None,
lower: Optional[float] = None,
stdev: Optional[float] = None,
ontology: Optional[SBOTerm] = None,
):
"""Adds a global parameter to the model that will be referred by KineticModel objects in reaction models.
Args:
name (str): Name of the estimated parameter.
value (Optional[float], optional): Numerical value of the estimated parameter. Defaults to None.
initial_value (Optional[float], optional): Initial value that was used for the parameter estimation. Defaults to None.
unit (Optional[str], optional): Unit of the estimated parameter. Defaults to None.
stdev (Optional[float], optional): Standard deviation of the estimated parameter. Defaults to None.
ontology (Optional[SBOTerm], optional): Type of the estimated parameter. Defaults to None.
Returns:
str: Name of the parameter that has been added.
"""
param = KineticParameter(
name=name,
value=value,
unit=unit,
stdev=stdev,
initial_value=initial_value,
ontology=ontology,
is_global=True,
constant=constant,
upper=upper,
lower=lower,
)
if param.unit:
param._unit_id = self._convertToUnitDef(param.unit)
param.unit = self._unit_dict[param._unit_id]._get_unit_name()
# Assign the current EnzymeMLDocument
param._enzmldoc = self
# Add the parameter to the parameter_dict
self.global_parameters[param.name] = param
return param.name
[docs] @validate_arguments
def addCreator(self, creator: Creator, log: bool = True) -> str:
"""Adds a creator object to the EnzymeML document.
Args:
creator (Creator): Creator object to be added to the document.
Returns:
str: Unique internal identifier of the creator.
"""
# Generate ID
creator.id = self._generateID(prefix="a", dictionary=self.creator_dict)
# Add to the document
self.creator_dict[creator.id] = creator
if log:
# Log creator object
log_object(logger, creator)
logger.debug(
f"Added {type(creator).__name__} ({creator.id}) '{creator.family_name}' to document '{self.name}'"
)
return creator.id
[docs] @validate_arguments
def addVessel(self, vessel: Vessel, use_parser: bool = True) -> str:
"""Adds a Vessel object to the EnzymeML document.
Args:
vessel (Vessel): Vessel object to be added to the document.
use_parser (bool, optional): Whether to user the unit parser or not. Defaults to True.
Returns:
str: Unique internal identifier of the reactant.
"""
return self._addSpecies(
species=vessel,
prefix="v",
dictionary=self.vessel_dict,
use_parser=use_parser,
)
[docs] @validate_arguments
def addReactant(self, reactant: Reactant, use_parser: bool = True) -> str:
"""Adds a Reactant object to the EnzymeML document.
Args:
reactant (Reactant): Reactant object to be added to the document.
use_parser (bool, optional): Whether to user the unit parser or not. Defaults to True.
Returns:
str: Unique internal identifier of the reactant.
"""
return self._addSpecies(
species=reactant,
prefix="s",
dictionary=self.reactant_dict,
use_parser=use_parser,
)
[docs] @validate_arguments
def addProtein(self, protein: Protein, use_parser: bool = True) -> str:
"""Adds a Protein object to the EnzymeML document.
Args:
protein (Protein): Protein object to be added to the document.
use_parser (bool, optional): Whether to user the unit parser or not. Defaults to True.
Returns:
str: Unique internal identifier of the protein.
"""
return self._addSpecies(
species=protein,
prefix="p",
dictionary=self.protein_dict,
use_parser=use_parser,
)
@validate_arguments
def _add_complex(self, complex: Complex, use_parser: bool = True) -> str:
"""Adds a Complex object to the EnzymeML document.
Args:
complex (Complex): Complex object to be added to the document.
use_parser (bool, optional): Whether to user the unit parser or not. Defaults to True.
Returns:
str: Unique internal identifier of the complex.
"""
return self._addSpecies(
species=complex,
prefix="c",
dictionary=self.complex_dict,
use_parser=use_parser,
)
[docs] @validate_arguments
def addComplex(
self,
name: str,
participants: List[str],
vessel_id: str,
init_conc: Optional[float] = None,
unit: Optional[str] = None,
):
# First convert all participants given as name to IDs
participants = [self.getAny(participant).id for participant in participants]
return self._add_complex(
Complex(
name=name,
participants=participants,
vessel_id=vessel_id,
init_conc=init_conc,
unit=unit,
)
)
def _addSpecies(
self,
species: Union[AbstractSpecies, Vessel],
prefix: str,
dictionary: dict,
use_parser: bool = True,
log: bool = True,
) -> str:
"""Helper function to add any specific species to the EnzymeML document.
Args:
species (AbstractSpecies): Species that is about to be added to the EnzymeML document.
prefix (str): Character that is used to generate a unique internal identifier.
dictionary (dict): The dictionary where the species will be added to.
use_parser (bool, optional): Whether to user the unit parser or not. Defaults to True.
Returns:
str: The internal identifier of the species.
"""
# Generate ID
species.id = self._generateID(prefix=prefix, dictionary=dictionary)
species.meta_id = f"METAID_{species.id.upper()}"
# Update unit to UnitDefID
if species.unit and use_parser:
unit_id = self._convertToUnitDef(species.unit)
species._unit_id = unit_id
species.unit = self._unit_dict[species._unit_id]._get_unit_name()
elif species.unit and use_parser is False:
species._unit_id = species.unit
species.unit = self.getUnitString(species._unit_id)
species.unit = self._unit_dict[species._unit_id]._get_unit_name()
# Log creation of the object
log_object(logger, species)
# Finally, set the current document to the
# object attribute _enzmldoc to allow unit changes
species._enzmldoc = self
# Add species to dictionary
dictionary[species.id] = species
# Log the addition
if log:
logger.debug(
f"Added {type(species).__name__} ({species.id}) '{species.name}' to document '{self.name}'"
)
return species.id
[docs] def addReaction(self, reaction: EnzymeReaction, use_parser: bool = True) -> str:
"""
Adds EnzymeReaction object to EnzymeMLDocument object.
Automatically assigns ID and converts units.
Args:
reaction (EnzymeReaction): Object describing reaction
use_parser (bool, optional): If set True, will use
internal unit parser.
Defaults to True.
Returns:
string: Internal identifier for the reaction.
Use it for other objects!
"""
# Generate ID
reaction.id = self._generateID("r", self.reaction_dict)
reaction.meta_id = f"METAID_{reaction.id.upper()}"
if use_parser and reaction.temperature:
# Reset temperature for SBML compliance to Kelvin
reaction.temperature = (
reaction.temperature + 273.15
if re.match(r"^c|celsius", reaction.temperature_unit.lower())
else reaction.temperature
)
# Generate internal ID for the unit
reaction._temperature_unit_id = self._convertToUnitDef(
reaction.temperature_unit
)
elif reaction.temperature:
# Set the temperature unit to the actual string
reaction._temperature_unit_id = reaction.temperature_unit
reaction.temperature_unit = self.getUnitString(reaction.temperature_unit)
# Set model units and check for consistency
if reaction.model:
# ID consistency
self._check_kinetic_model_ids(model=reaction.model)
# Reference global parameters
self._reference_global_parameters(model=reaction.model)
# Unit conversion
self._convert_kinetic_model_units(reaction.model.parameters, enzmldoc=self)
# Finally add the reaction to the document and assign the doc
reaction._enzmldoc = self
self.reaction_dict[reaction.id] = reaction
# Log the object
log_object(logger, reaction)
logger.debug(
f"Added {type(reaction).__name__} ({reaction.id}) '{reaction.name}' to document '{self.name}'"
)
return reaction.id
def _check_kinetic_model_ids(self, model) -> None:
"""Checks if the given species IDs/names are consistent with the EnzymeML document. Also converts names into IDs, if given in the document.
Args:
equation (str): The rate law given in the KineticModel
"""
# Get all the params of the model to distinguis params from names
all_params = [param.name for param in model.parameters]
for node in ast.walk(ast.parse(model.equation)):
if isinstance(node, ast.Constant):
# If the equation was generated via the ModelFactory
if isinstance(node.value, str):
name = repr(node.value)
else:
# Numeric constants are ignored now
continue
elif isinstance(node, ast.Str):
# Python 3.7 compatibility since ast.Constant is ast.Str here
name = node.s
elif isinstance(node, ast.Name):
# If the equation has been done manually
name = node.id
if name in all_params:
# If its a parameter
continue
else:
continue
if name not in self.getSpeciesIDs():
try:
# Try to get by name and substitute in equation
species_id = self.getAny(
name.replace("'", ""),
).id
if not name.startswith("'"):
name = f"'{name}'"
model.equation = model.equation.replace(name, species_id)
except StopIteration:
# If neither name or ID is found, raise Error
raise SpeciesNotFoundError(
enzymeml_part="Kinetic Model", species_id=name
)
elif not bool(re.match(r"'[a-zA-Z\d]*'", name)):
quoted = f"'{name}'"
model.equation = model.equation.replace(quoted, name)
def _reference_global_parameters(self, model):
"""Removes single parameters and references global parameters if names match"""
nu_parameters = []
for parameter in model.parameters:
name = parameter.name
if name in self.global_parameters:
nu_parameters.append(self.global_parameters[name])
else:
nu_parameters.append(parameter)
model.parameters = nu_parameters
@staticmethod
def _convert_kinetic_model_units(
parameters: List[KineticParameter], enzmldoc
) -> None:
"""Converts given unit strings to unit IDs and adds them to the model.
Args:
parameters (List[KineticParameter]): List of all kinetic parameters.
enzmldoc ([type]): Used to convert unit strings to unit IDs.
"""
for parameter in parameters:
if parameter.unit:
parameter._unit_id = enzmldoc._convertToUnitDef(parameter.unit)
parameter.unit = enzmldoc._unit_dict[
parameter._unit_id
]._get_unit_name()
parameter._enzmldoc = enzmldoc
[docs] def addReactions(self, reactions: List[EnzymeReaction]):
"""Adds multiple reactions to an EnzymeML document.
Args:
reactions (List[EnzymeReaction]): List of EnzymeReaction objects
"""
return {reaction.name: self.addReaction(reaction) for reaction in reactions}
[docs] def addFile(self, filepath=None, file_handle=None, description="Undefined") -> str:
"""Adds any arbitrary file to the document. Please note, that if a filepath is given, any file_handle will be ignored.
Args:
filepath (str, optional): Path to the file that is added to the document. Defaults to None.
file_handle (io.BufferedReader, optional): File handle that will be read to a bytes string. Defaults to None.
Returns:
str: Internal identifier for the file.
"""
# Generate a unique identifier for the file
file_id = self._generateID("f", self.file_dict)
if filepath:
# Open file handle
file_handle = open(filepath, "rb")
elif filepath is None and file_handle is None:
raise ValueError("Please specify either a file path or a file handle")
# Finally, add the file and close the handler
self.file_dict[file_id] = {
"name": os.path.basename(file_handle.name),
"handler": file_handle,
"description": description,
}
return file_id
[docs] @validate_arguments
def addMeasurement(self, measurement: Measurement) -> str:
"""Adds a measurement to an EnzymeMLDocument and validates consistency with already defined elements of the document.
Args:
measurement (Measurement): Collection of data and initial concentrations per reaction
Returns:
measurement_id (String): Assigned measurement identifier.
"""
# Assign the current EnzymeMLDocument to
# propagate towards sub-elements such
# that unit changes can be done comliant
# to UnitDefinitions
measurement._enzmldoc = self
# Check consistency
self._checkMeasurementConsistency(measurement)
# Convert all measurement units to UnitDefs
self._convertMeasurementUnits(measurement)
# Generate the ID and add it to the dictionary
measurement.id = self._generateID(prefix="m", dictionary=self.measurement_dict)
# Update measurement ID to all replicates
protein_data = measurement.species_dict["proteins"]
reactant_data = measurement.species_dict["reactants"]
self._updateReplicateMeasurementIDs(protein_data, measurement.id)
self._updateReplicateMeasurementIDs(reactant_data, measurement.id)
# Add it to the EnzymeMLDocument
self.measurement_dict[measurement.id] = measurement
# Log the object
log_object(logger, measurement)
logger.debug(
f"Added {type(measurement).__name__} ({measurement.id}) '{measurement.name}' to document '{self.name}'"
)
return measurement.id
def _convertMeasurementUnits(self, measurement: Measurement) -> None:
"""Converts string SI units to UnitDef objects and IDs
Args:
measurement (Measurement): Object defining a measurement
"""
# Update global time of the measurement
if measurement.global_time:
measurement._global_time_unit_id = self._convertToUnitDef(
measurement.global_time_unit
)
# Set correct string
measurement.global_time_unit = self._unit_dict[
measurement._global_time_unit_id
]._get_unit_name()
# Update temperature unit of the measurement
if measurement.temperature_unit:
measurement._temperature_unit_id = self._convertToUnitDef(
measurement.temperature_unit
)
# Set correct string
measurement.temperature_unit = self._unit_dict[
measurement._temperature_unit_id
]._get_unit_name()
def update_dict_units(
measurement_data_dict: Dict[str, MeasurementData], measurement: Measurement
) -> None:
"""Helper function to update units and assignment of the coupled EnzymeMLDocument"""
for measurement_data in measurement_data_dict.values():
# Assign the measurements enzmldoc
measurement_data._enzmldoc = measurement._enzmldoc
measurement_data._unit_id = self._convertToUnitDef(
measurement_data.unit
)
global_time = self._convertReplicateUnits(measurement_data)
if global_time:
measurement.global_time = global_time
# Perform update
update_dict_units(measurement.species_dict["proteins"], measurement)
update_dict_units(measurement.species_dict["reactants"], measurement)
def _convertReplicateUnits(
self, measurement_data: MeasurementData
) -> Optional[List[float]]:
"""Converts replicate unit strings to unit definitions.
Args:
measurement_data (MeasurementData): Object holding measurement data for a species
"""
# TODO verify globally global time
global_time = None
for replicate in measurement_data.replicates:
# Assign the EnzymeML document for compliant changes
# of units when already added to the document
replicate._enzmldoc = measurement_data._enzmldoc
# Convert unit
time_unit_id = self._convertToUnitDef(replicate.time_unit)
data_unit_id = self._convertToUnitDef(replicate.data_unit)
# Assign unit IDs
replicate._data_unit_id = data_unit_id
replicate._time_unit_id = time_unit_id
global_time = replicate.time
return global_time
def _updateReplicateMeasurementIDs(
self, measurement_data_dict: Dict[str, MeasurementData], measurement_id: str
):
"""Updates the measurement IDs of replicates."""
for measurement_data in measurement_data_dict.values():
measurement_data.measurement_id = measurement_id
replicates = measurement_data.replicates
for replicate in replicates:
replicate.measurement_id = measurement_id
def _checkMeasurementConsistency(self, measurement: Measurement) -> None:
"""Checks if the used species in the measurement are consistent with the EnzymeML document.
Args:
measurement (MeasurementData): Objech holding measurement data for a species.
"""
map(self._checkSpecies, measurement.species_dict["reactants"])
map(self._checkSpecies, measurement.species_dict["proteins"])
def _checkSpecies(self, species_id):
"""Checks if a species is defined in the EnzymeML document.
Args:
species_id (str): Unique identifier of the species.
Raises:
SpeciesNotFoundError: Raised when a species is not defined in the EnzymeML document.
"""
all_species = {**self.reactant_dict, **self.protein_dict, **self.complex_dict}
if species_id not in all_species.keys():
# Retrieve species for ontology
species = self._getSpecies(
id=species_id,
dictionary=all_species,
element_type="Proteins/Reactants/Complexes",
)
# Use the EnzymeMLPart Enum to derive the correct place
sbo_term = SBOTerm(species.__dict__["ontology"]).name
enzymeml_part = EnzymeMLPart.partFromSBOTerm(sbo_term)
# Raise an error if the species is nowhere present
raise SpeciesNotFoundError(
species_id=species_id, enzymeml_part=enzymeml_part
)
def _convertToUnitDef(self, unit: Optional[str]) -> str:
"""Reads an SI unit string and converts it into a EnzymeML compatible UnitDef
Args:
unit (str): String representing the SI unit.
Returns:
str: Unique identifier of the UnitDef.
"""
if unit is None:
raise TypeError("No unit given.")
elif unit in self._unit_dict.keys():
return unit
return UnitCreator().getUnit(unit, self)
# ! Getter methods
[docs] def getSpeciesIDs(self) -> List[str]:
return list(
{**self.protein_dict, **self.reactant_dict, **self.complex_dict}.keys()
)
[docs] def getUnitString(self, unit_id: Optional[str]) -> str:
"""Return the unit name corresponding to the given unit ID.
Args:
unit_id (str): Unique internal ID of the unit.
Raises:
SpeciesNotFoundError: Raised when the requested unit is not found.
Returns:
str: String representation of the unit.
"""
if unit_id is None:
raise TypeError("No unit given.")
try:
return self._unit_dict[unit_id].name
except KeyError:
raise SpeciesNotFoundError(species_id=unit_id, enzymeml_part="Units")
[docs] def getUnitDef(self, id: str) -> UnitDef:
"""Returns the unit associated with the given ID.
Args:
id (str): Unique internal ID of the unit.
Raises:
SpeciesNotFoundError: Raised when the requested unit is not found.
Returns:
UnitDef: The corresponding unit object.
"""
return self._getSpecies(
id=id,
dictionary=self._unit_dict,
element_type="Units",
)
[docs] def getVessel(self, id: str) -> Vessel:
"""Returns the vessel associated with the given ID.
Args:
id (str): Unique internal ID of the vessel.
Raises:
SpeciesNotFoundError: Raised when the requested vessel is not found.
Returns:
Vessel: The corresponding unit object.
"""
return self._getSpecies(
id=id,
dictionary=self.vessel_dict,
element_type="Vessels",
)
[docs] def getReaction(self, id: str) -> EnzymeReaction:
"""Returns the reaction associated with the given ID.
Args:
id (str): Unique internal ID of the reaction.
Raises:
SpeciesNotFoundError: Raised when the requested reaction is not found.
Returns:
EnzymeReaction: The corresponding reaction object.
"""
return self._getSpecies(
id=id,
dictionary=self.reaction_dict,
element_type="EnzymeReaction",
)
[docs] def getMeasurement(self, id: str) -> Measurement:
"""Returns the measurement associated with the given ID.
Args:
id (str): Unique internal ID of the measurement.
Raises:
SpeciesNotFoundError: Raised when the requested measurement is not found.
Returns:
Measurement: The corresponding measurement object.
"""
return self._getSpecies(
id=id,
dictionary=self.measurement_dict,
element_type="Measurement",
)
[docs] def getReactant(self, id: str) -> Reactant:
"""Returns the reactant associated with the given ID.
Args:
id (str): Unique internal ID of the reactant.
Raises:
SpeciesNotFoundError: Raised when the requested reactant is not found.
Returns:
Reactant: The corresponding reactant object.
"""
return self._getSpecies(
id=id,
dictionary=self.reactant_dict,
element_type="Reactant",
)
[docs] def getProtein(self, id: str) -> Protein:
"""Returns the protein associated with the given ID.
Args:
id (str): Unique internal ID of the protein.
Raises:
SpeciesNotFoundError: Raised when the requested protein is not found.
Returns:
Protein: The corresponding protein object.
"""
return self._getSpecies(
id=id,
dictionary=self.protein_dict,
element_type="Protein",
)
[docs] def getFile(self, id: str, by_id: bool = True) -> dict:
"""Returns the file associated with the given ID.
Args:
id (str): Unique internal ID of the file.
Raises:
SpeciesNotFoundError: Raised when the requested file is not found.
Returns:
Dict[str, dict]: The corresponding file object.
"""
if by_id:
return self.file_dict[id]
else:
return next(
filter(lambda file: file["name"] == id, self.file_dict.values())
)
[docs] def getAny(self, id: str) -> AbstractSpecies:
"""Returns anything associated with the given ID.
Args:
id (str): Unique internal ID of the object.
Raises:
SpeciesNotFoundError: Raised when the requested object is not found.
Returns:
Dict[str, dict]: The corresponding file object.
"""
all_dicts = {
**self._unit_dict,
**self.vessel_dict,
**self.reactant_dict,
**self.protein_dict,
**self.complex_dict,
**self.reaction_dict,
}
return self._getSpecies(
id=id,
dictionary=all_dicts,
element_type="Document",
)
def _getSpecies(
self,
id: str,
dictionary: dict,
element_type: str,
):
"""Helper function to retrieve any kind of species from the EnzymeML document.
Args:
id (str): Unique internal ID.
dictionary (dict): Dictionary that stores all objects.
element_type (str): Type of object that is in the dictionary.
Raises:
SpeciesNotFoundError: Raised when the requested species is not found.
Returns:
Union[ AbstractSpecies, EnzymeReaction, Measurement ]: The requested object
"""
for attr in ["id", "name"]:
species = self._search_object(value=id, attr=attr, dictionary=dictionary)
if species:
return species
raise SpeciesNotFoundError(species_id=id, enzymeml_part=element_type)
def _search_object(self, value, attr: str, dictionary: dict):
"""Filters a given dictionary for an attributes and returns it if found.
Args:
value ([type]): Term that is searched for.
attr (str): Corresponding attribute to look for.
dictionary (dict): Dictionary that si searched.
Returns:
AbstractSpecies: Species from the EnzymeML document.
"""
try:
# Filter the dict for the desired species
return next(
filter(lambda obj: obj.__dict__[attr] == value, dictionary.values())
)
except StopIteration:
return None
[docs] def getReactantList(self) -> List[Reactant]:
"""Returns a list of all reactants in the EnzymeML document."
Returns:
List[Reactant]: List of all reactants in the EnzymeML document.
"""
return self._getSpeciesList(self.reactant_dict)
[docs] def getProteinList(self) -> List[Protein]:
"""Returns a list of all proteins in the EnzymeML document."
Returns:
List[Protein]: List of all proteins in the EnzymeML document.
"""
return self._getSpeciesList(self.protein_dict)
[docs] def getReactionList(self) -> List[EnzymeReaction]:
"""Returns a list of all reactions in the EnzymeML document."
Returns:
List[EnzymeReaction]: List of all reactions in the EnzymeML document.
"""
return self._getSpeciesList(self.reaction_dict)
[docs] def getFilesList(self):
"""Returns a list of all files in the EnzymeML document."
Returns:
List[dict]: List of all files in the EnzymeML document.
"""
return self._getSpeciesList(self.file_dict)
@staticmethod
def _getSpeciesList(dictionary: dict) -> list:
"""Helper function to retrieve lists of dicitonary objects
Args:
dictionary (dict): Dictionary of corresponding elements
Returns:
list: Returns all values in the dictionary
"""
return list(dictionary.values())
[docs] @deprecated_getter("doi")
def getDoi(self) -> Optional[str]:
return self.doi
[docs] @deprecated_getter("pubmedid")
def getPubmedID(self) -> Optional[str]:
return self.pubmedid
[docs] @deprecated_getter("url")
def getUrl(self) -> Optional[str]:
return self.url
[docs] @deprecated_getter("created")
def get_created(self):
return self.created
[docs] @deprecated_getter("modified")
def getModified(self):
return self.modified
[docs] @deprecated_getter("creators")
def getCreator(self):
return self.creator_dict
[docs] @deprecated_getter("name")
def getName(self):
return self.name
[docs] @deprecated_getter("level")
def getLevel(self):
return self.level
[docs] @deprecated_getter("version")
def getVersion(self):
return self.version
[docs] @deprecated_getter("protein_dict")
def getProteinDict(self):
return self.protein_dict
[docs] @deprecated_getter("reactant_dict")
def getReactantDict(self):
return self.reactant_dict
[docs] @deprecated_getter("reaction_dict")
def getReactionDict(self):
return self.reaction_dict
[docs] @deprecated_getter("measurement_dict")
def getMeasurementDict(self):
return self.measurement_dict
[docs] @deprecated_getter("unit_dict")
def getUnitDict(self):
return self._unit_dict
[docs] @deprecated_getter("file_dict")
def getFileDict(self):
return self.file_dict