# File: enzymemlreader.py
# Project: tools
# Author: Jan Range
# License: BSD-2 clause
# Copyright (c) 2022 Institute of Biochemistry and Technical Biochemistry Stuttgart
import os
import re
from typing import Dict, List, Tuple, Union, Optional
import libsbml
import xml.etree.ElementTree as ET
import pandas as pd
import tempfile
from pyenzyme.enzymeml.core.creator import Creator
from pyenzyme.enzymeml.core.enzymemldocument import EnzymeMLDocument
from pyenzyme.enzymeml.core.protein import Protein
from pyenzyme.enzymeml.core.complex import Complex
from pyenzyme.enzymeml.core.reactant import Reactant
from pyenzyme.enzymeml.core.replicate import Replicate
from pyenzyme.enzymeml.core.unitdef import UnitDef
from pyenzyme.enzymeml.core.vessel import Vessel
from pyenzyme.enzymeml.core.measurement import Measurement
from pyenzyme.enzymeml.core.enzymereaction import EnzymeReaction, ReactionElement
from pyenzyme.enzymeml.models.kineticmodel import KineticModel, KineticParameter
from pyenzyme.enzymeml.core.ontology import DataTypes, EnzymeMLPart, SBOTerm
from pyenzyme.enzymeml.core.abstract_classes import (
AbstractSpeciesFactory,
AbstractSpecies,
)
from libsbml import SBMLReader
from libcombine import CombineArchive
from io import StringIO
# ! Factories
[docs]class ReactantFactory(AbstractSpeciesFactory):
"""Returns an un-initialized reactant species object"""
enzymeml_part: str = "reactant_dict"
[docs] def get_species(self, **kwargs) -> AbstractSpecies:
reactant = Reactant(**kwargs)
reactant._unit_id = kwargs["_unit_id"]
return reactant
[docs]class ProteinFactory(AbstractSpeciesFactory):
"""Returns an un-initialized protein species object"""
enzymeml_part: str = "protein_dict"
[docs] def get_species(self, **kwargs) -> AbstractSpecies:
protein = Protein(**kwargs)
protein._unit_id = kwargs["_unit_id"]
return protein
[docs]class ComplexFactory(AbstractSpeciesFactory):
"""Returns an un-initialized complex species object"""
enzymeml_part: str = "complex_dict"
[docs] def get_species(self, **kwargs) -> AbstractSpecies:
complex = Complex(**kwargs)
complex._unit_id = kwargs["_unit_id"]
return complex
[docs]def species_factory_mapping(sbo_term: str) -> AbstractSpeciesFactory:
"""Maps from SBOTerms to the appropriate species using a factory"""
# Get the enum entity for the mapping
entity = EnzymeMLPart.entityFromSBOTerm(sbo_term)
factory_mapping = {
"PROTEIN": ProteinFactory(),
"SMALL_MOLECULE": ReactantFactory(),
"ION": ReactantFactory(),
"RADICAL": ReactantFactory(),
"MACROMOLECULAR_COMPLEX": ComplexFactory(),
"PROTEIN_COMPLEX": ComplexFactory(),
"DIMER": ComplexFactory(),
}
return factory_mapping[entity]
[docs]class EnzymeMLReader:
[docs] def readFromFile(self, path: str) -> EnzymeMLDocument:
"""
Reads EnzymeML document to an object layer EnzymeMLDocument class.
Args:
path (str): Path to .omex container or
folder destination for plain .xml
"""
if not path.endswith(".omex"):
raise TypeError(
f"File {os.path.basename(path)} is not a valid OMEX archive"
)
# Read omex archive
self.path = path
self.archive = CombineArchive()
self.archive.initializeFromArchive(self.path)
content = self.archive.extractEntryToString("./experiment.xml")
desc = self.archive.getMetadataForLocation("./experiment.xml")
# Get previous logs
log = self.archive.extractEntryToString("./history.log")
# Read experiment file (sbml)
reader = SBMLReader()
document = reader.readSBMLFromString(content)
document.getErrorLog().printErrors()
model = document.getModel()
# Initialize EnzymeMLDocument object
self.enzmldoc = EnzymeMLDocument(
name=model.getName(), level=model.getLevel(), version=model.getVersion()
)
# Add logs to the document
self.enzmldoc.log = log
# Fetch references
self._getRefs(model, self.enzmldoc)
# Fetch Creators
self._getCreators(omex_desc=desc, enzmldoc=self.enzmldoc)
# try:
# # TODO extract VCard
# model_hist = model.getModelHistory()
# enzmldoc.setCreated(
# model_hist.getCreatedDate().getDateAsString()
# )
# enzmldoc.setModified(
# model_hist.getModifiedDate().getDateAsString()
# )
# Fetch units
unitDict = self._getUnits(model)
self.enzmldoc._unit_dict = unitDict
# Fetch Vessel
vessel = self._getVessel(model, self.enzmldoc)
self.enzmldoc.vessel_dict = vessel
# Fetch Species
protein_dict, reactant_dict, complex_dict = self._getSpecies(
model, self.enzmldoc
)
self.enzmldoc.reactant_dict = reactant_dict
self.enzmldoc.protein_dict = protein_dict
self.enzmldoc.complex_dict = complex_dict
# fetch global parameters
self._getGlobalParameters(model, self.enzmldoc)
# fetch reaction
reaction_dict = self._getReactions(model, self.enzmldoc)
self.enzmldoc.reaction_dict = reaction_dict
# fetch Measurements
measurement_dict = self._getData(model, self.enzmldoc)
self.enzmldoc.measurement_dict = measurement_dict
# fetch added files
self._getFiles(self.enzmldoc)
del self.path
return self.enzmldoc
@staticmethod
def _sboterm_to_enum(sbo_term: int) -> Optional[SBOTerm]:
try:
sbo_string: str = libsbml.SBO_intToString(sbo_term)
if len(sbo_string) == 0:
return None
return SBOTerm(sbo_string)
except ValueError:
return None
def _getRefs(self, model, enzmldoc):
if len(model.getAnnotationString()) == 0:
return
root = ET.fromstring(model.getAnnotationString())[0]
for element in root:
if "doi" in element.tag:
enzmldoc.doi = element.text
elif "pubmedID" in element.tag:
enzmldoc.pubmedid = element.text
elif "url" in element.tag:
enzmldoc.url = element.text
def _getCreators(self, omex_desc, enzmldoc) -> None:
"""Fetches all creators from an Combine archive's metadata.
Args:
omex_desc (OMEX obj): Combine metadata description.
Returns:
List[Creator]: Fetched list of creator objects.
"""
# Get the number of creators to iterate
numCreators = omex_desc.getNumCreators()
for i in range(numCreators):
# Fetch creator information
creator = omex_desc.getCreator(i)
enzmldoc.addCreator(
Creator(
family_name=creator.getFamilyName(),
given_name=creator.getGivenName(),
mail=creator.getEmail(),
),
log=False,
)
def _getUnits(self, model: libsbml.Model) -> Dict[str, UnitDef]:
"""Fetches all the units present in the SBML model.^
Args:
model (libsbml.Model): The SBML model from which the units are fetched.
Returns:
[type]: [description]
"""
unitDict = {}
unitdef_list = model.getListOfUnitDefinitions()
for unit in unitdef_list:
# Get infos from the SBML model
name = unit.name
id = unit.id
meta_id = unit.meta_id
ontology = None # TODO get unit ontology
# Create unit definition
unitdef = UnitDef(name=name, id=id, ontology=ontology, meta_id=meta_id)
for baseunit in unit.getListOfUnits():
# Construct unit definition with base units
unitdef.addBaseUnit(
kind=baseunit.toXMLNode().getAttrValue("kind"),
exponent=baseunit.getExponentAsDouble(),
scale=baseunit.getScale(),
multiplier=baseunit.getMultiplier(),
)
# Finally add the unit definition
unitDict[id] = unitdef
return unitDict
def _getVessel(
self, model: libsbml.Model, enzmldoc: "EnzymeMLDocument"
) -> Dict[str, Vessel]:
"""Fetches all the vessels/compartments present in the SBML model.
Args:
model (libsbml.Model): The SBML model from which the vessels are fetched.
Returns:
Dict[str, Vessel]: Corresponding vessel dictionary that has been converted.
"""
vessel_dict = {}
compartments = model.getListOfCompartments()
for compartment in compartments:
name = compartment.getName()
id = compartment.getId()
# Set up dictionary for optional attributes
params = {}
if compartment.isSetVolume():
params["volume"] = compartment.getSize()
params["_unit_id"] = compartment.getUnits()
params["unit"] = enzmldoc.getUnitString(params["_unit_id"])
vessel = Vessel(name=name, id=id, **params)
vessel._unit_id = params.get("_unit_id")
vessel._enzmldoc = self.enzmldoc
vessel_dict[vessel.id] = vessel
return vessel_dict
def _getSpecies(
self, model: libsbml.Model, enzmldoc: "EnzymeMLDocument"
) -> Tuple[Dict[str, Protein], Dict[str, Reactant], Dict[str, Complex]]:
# initialize dictionaries and get species
protein_dict = {}
reactant_dict = {}
complex_dict = {}
species_list = model.getListOfSpecies()
for species in species_list:
# Check if init conc is given
init_conc = species.getInitialConcentration()
unit_id = species.getSubstanceUnits()
if repr(init_conc) == "nan":
# Handle not existent init concs
init_conc = None
if unit_id:
# Get unit string if given
unit = enzmldoc.getUnitString(unit_id)
else:
unit = None
# Get SBOTerm, but if there is none, give default
ontology = self._sboterm_to_enum(species.getSBOTerm())
# Parse annotations and construct a kwargs dictionary
param_dict = self._parseSpeciesAnnotation(species.getAnnotationString())
param_dict.update(
{
"id": species.getId(),
"meta_id": species.getMetaId(),
"vessel_id": species.getCompartment(),
"name": species.getName(),
"constant": species.getConstant(),
"ontology": ontology,
"init_conc": init_conc,
"_unit_id": unit_id,
"unit": unit,
# Some attributes need special care
"ecnumber": param_dict.get("e_cnumber"),
"uniprotid": param_dict.get("uniprot_id"),
"participants": param_dict.get("participant"),
}
)
# Get species factory from ontology
try:
# Current version uses SBOTerms to distinguish between entities
species_factory = species_factory_mapping(param_dict["ontology"])
except ValueError:
# Backwards compatibility to old documents that do not incorporate SBOTerms
if param_dict["id"].startswith("s"):
species_factory = species_factory_mapping(
SBOTerm.SMALL_MOLECULE.value
)
# Remove ontology to get the default
param_dict.pop("ontology")
elif param_dict["id"].startswith("p"):
species_factory = species_factory_mapping(SBOTerm.PROTEIN.value)
# Remove ontology to get the default
param_dict.pop("ontology")
else:
raise ValueError(
f"ID {param_dict['id']} is not supported. Please use either of these 'p|s|c'"
)
# Use factory to get the species class
species = species_factory.get_species(**param_dict)
species._enzmldoc = self.enzmldoc
if species_factory.enzymeml_part == "protein_dict":
protein_dict[species.id] = species
elif species_factory.enzymeml_part == "reactant_dict":
reactant_dict[species.id] = species
elif species_factory.enzymeml_part == "complex_dict":
complex_dict[species.id] = species
return protein_dict, reactant_dict, complex_dict
@staticmethod
def _parseSpeciesAnnotation(annotationString):
if len(annotationString) == 0:
return dict()
def camel_to_snake(name):
name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
speciesAnnot = ET.fromstring(annotationString)[0]
# Initialize annotation dictionary
param_dict = {}
for enzymeMLAnnot in speciesAnnot:
key = enzymeMLAnnot.tag.split("}")[-1]
key = camel_to_snake(key)
attribute = enzymeMLAnnot.text
if key in param_dict:
# Take care of list attributes
try:
param_dict[key].append(attribute)
except AttributeError:
param_dict[key] = [param_dict[key], attribute]
continue
param_dict[key] = attribute
return param_dict
def _getGlobalParameters(self, model: libsbml.Model, enzmldoc):
"""Fetches global parameters from the SBML model"""
parameters = model.getListOfParameters()
for parameter in parameters:
parameter = self._parse_parameter(parameter, enzmldoc)
parameter.is_global = True
parameter._enzmldoc = self.enzmldoc
enzmldoc.global_parameters[parameter.name] = parameter
def _getReactions(
self, model: libsbml.Model, enzmldoc: "EnzymeMLDocument"
) -> Dict[str, EnzymeReaction]:
# Get SBML list of reactions
reactionsList = model.getListOfReactions()
# Initialize reaction dictionary
reaction_dict = {}
# parse annotations and filter replicates
for reaction in reactionsList:
# Fetch conditions
if reaction.getAnnotationString():
reactionAnnot = ET.fromstring(reaction.getAnnotationString())[0]
conditions = self._parseConditions(reactionAnnot, enzmldoc)
else:
conditions = {}
# Fetch Elements in SpeciesReference
educts = self._getElements(
reaction.getListOfReactants(), ontology=SBOTerm.SUBSTRATE
)
products = self._getElements(
reaction.getListOfProducts(), ontology=SBOTerm.PRODUCT
)
modifiers = self._getElements(
reaction.getListOfModifiers(), modifiers=True, ontology=SBOTerm.CATALYST
)
# Get the ontology
ontology = self._sboterm_to_enum(reaction.getSBOTerm())
if ontology is None:
ontology = SBOTerm.BIOCHEMICAL_REACTION
# Create object
enzyme_reaction = EnzymeReaction(
id=reaction.id,
meta_id="META_" + reaction.id.upper(),
name=reaction.name,
reversible=reaction.reversible,
educts=educts,
products=products,
modifiers=modifiers,
ontology=ontology,
**conditions,
)
# Check for kinetic model
if reaction.getKineticLaw():
# Check if model exists
kinetic_law = reaction.getKineticLaw()
kinetic_model = self._getKineticModel(kinetic_law, enzmldoc)
enzyme_reaction.model = kinetic_model
# Add global parameters
for name, global_parameter in enzmldoc.global_parameters.items():
# Keep a reference to the global paremeter
if name in enzyme_reaction.model.equation:
enzyme_reaction.model.parameters.append(global_parameter)
# Add reaction to reaction_dict
enzyme_reaction._enzmldoc = self.enzmldoc
reaction_dict[enzyme_reaction.id] = enzyme_reaction
return reaction_dict
@staticmethod
def _parseConditions(
reactionAnnot: ET.Element, enzmldoc: "EnzymeMLDocument"
) -> Dict[str, Union[str, float]]:
"""Exracts the conditions present in the SBML reaction annotations.
Args:
reactionAnnot (ET.Element): The reaction annotation element.
enzmldoc (EnzymeMLDocument): The EnzymeMLDocument against which the data will be validated.
Returns:
Dict[str, Union[str, float]]: Mapping for the conditions.
"""
# Get the conditions element
conditions = reactionAnnot[0]
condition_dict = {}
for condition in conditions:
# Sort all the conditions
if "temperature" in condition.tag:
# Get temperature conditions
condition_dict["temperature"] = float(condition.attrib["value"])
# Parse unit ID to Unit string
condition_dict["_temperature_unit_id"] = condition.attrib["unit"]
condition_dict["temperature_unit"] = enzmldoc.getUnitString(
condition_dict["_temperature_unit_id"]
)
elif "ph" in condition.tag:
# Get the pH value
condition_dict["ph"] = float(condition.attrib["value"])
return condition_dict
def _getElements(
self,
species_refs: List[libsbml.SpeciesReference],
ontology: SBOTerm,
modifiers: bool = False,
) -> List[ReactionElement]:
"""Extracts the speciesReference objects from the associated list and converts them to ReactionElements
Args:
species_refs (List[libsbml.SpeciesReference]): The species refrences for the reaction <-> Chemical reaction elements.
modifiers (bool, optional): Used to override missing stoichiometry and constant for modifiers. Defaults to False.
Returns:
List[ReactionElement]: The list of reaction elements.
"""
reaction_elements = []
for species_ref in species_refs:
species_id = species_ref.getSpecies()
stoichiometry = 1.0 if modifiers else species_ref.getStoichiometry()
constant = True if modifiers else species_ref.getConstant()
sbo_term = libsbml.SBO_intToString(species_ref.getSBOTerm())
if sbo_term:
ontology = SBOTerm(sbo_term)
reaction_elements.append(
ReactionElement(
species_id=species_id,
stoichiometry=stoichiometry,
constant=constant,
ontology=ontology,
)
)
return reaction_elements
def _getKineticModel(
self, kineticLaw: libsbml.KineticLaw, enzmldoc: "EnzymeMLDocument"
) -> KineticModel:
"""Extracts a kinetic rate law from the SBML data model.
Args:
kineticLaw (libsbml.KineticLaw): The kinetic law to be extracted.
enzmldoc (EnzymeMLDocument): The EnzymeMLDocument to which the kinetic law will be added.
Returns:
KineticModel: Teh resulting kinetic model.
"""
# Extract metadata
name = kineticLaw.getName()
equation = kineticLaw.getFormula()
ontology = self._sboterm_to_enum(kineticLaw.getSBOTerm())
# Get local parameters
parameters = []
for local_param in kineticLaw.getListOfLocalParameters():
parameter = self._parse_parameter(local_param, enzmldoc)
if parameter.name in enzmldoc.global_parameters:
parameters.append(enzmldoc.global_parameters[parameter.name])
else:
parameters.append(parameter)
return KineticModel(
name=name, equation=equation, parameters=parameters, ontology=ontology
)
def _parse_parameter(self, parameter, enzmldoc):
"""Parses a paramater and converts it to a KineticParameter instance"""
# TODO refactor here
value = parameter.getValue()
unit_id = parameter.getUnits()
annotation = parameter.getAnnotationString()
param_dict = self._parseSpeciesAnnotation(annotation)
if unit_id:
param_dict["unit"] = enzmldoc.getUnitString(unit_id)
if parameter.__class__.__name__ == "LocalParameter":
constant = False
else:
constant = parameter.getConstant()
if repr(parameter.getValue()) == "nan":
value = None
nu_param = KineticParameter(
name=parameter.getId(),
value=value,
unit=param_dict.get("unit"),
ontology=self._sboterm_to_enum(parameter.getSBOTerm()),
initial_value=param_dict.get("initial_value"),
upper=param_dict.get("upper_bound"),
lower=param_dict.get("lower_bound"),
stdev=param_dict.get("stdev"),
constant=constant,
)
nu_param._enzmldoc = self.enzmldoc
if unit_id:
nu_param._unit_id = parameter.getUnits()
return nu_param
def _getData(
self, model: libsbml.Model, enzmldoc: "EnzymeMLDocument"
) -> Dict[str, Measurement]:
"""Retrieves all available measurements found in the EnzymeML document.
Args:
model (libsbml.Model): The SBML model from which the measurements are feteched,
Returns:
Dict[str, Measurement]: Mapping from measurement ID to the associated object.
"""
# Parse EnzymeML:format annotation
reactions = model.getListOfReactions()
annotation_string = reactions.getAnnotationString()
# Guard clause for when there is no data
if annotation_string == "":
return {}
# Parse annotation to an ElementTree
data_annotation = ET.fromstring(annotation_string)[0]
# Fetch measurements
measurement_dict, measurement_files = self._parseListOfMeasurements(
data_annotation, enzmldoc=enzmldoc
)
# Iterate over measurements and assign replicates
for measurement_id, measurement_file in measurement_files.items():
# Fetch list of files
files = self._parseListOfFiles(data_annotation)
# Fetch formats
formats = self._parseListOfFormats(data_annotation)
# Get file content
fileInfo = files[measurement_file]
file_content = self.archive.extractEntryToString(fileInfo["file"])
csvFile = pd.read_csv(StringIO(file_content), header=None)
# Get format data and extract time column
measurement_format = formats[fileInfo["format"]]
time, time_unit_id = [
(csvFile.iloc[:, int(column["index"])].tolist(), column["unit"])
for column in measurement_format
if column["type"] == "time"
][0]
measurement_dict[measurement_id]._global_time_unit_id = time_unit_id
# Create replicate objects
for format in measurement_format:
if format["type"] != "time":
# Get time course data
data = csvFile.iloc[:, int(format["index"])].tolist()
reactant_id = format["species"]
replicate_id = format["replica"]
data_type = DataTypes(format["type"])
data_unit_id = format["unit"]
is_calculated = format["isCalculated"]
replicate = Replicate(
id=replicate_id,
species_id=reactant_id,
data_type=data_type,
measurement_id=measurement_id,
data_unit=enzmldoc._unit_dict[data_unit_id].name,
time_unit=enzmldoc._unit_dict[time_unit_id].name,
data=data,
time=time,
is_calculated=is_calculated,
)
replicate._data_unit_id = data_unit_id
replicate._time_unit_id = time_unit_id
replicate._enzmldoc = self.enzmldoc
measurement_dict[measurement_id].addReplicates(
replicate, log=False, enzmldoc=enzmldoc
)
return measurement_dict
def _parseListOfFiles(
self, data_annotation: ET.Element
) -> Dict[str, Dict[str, str]]:
"""Extracts the list of files that are present in the annotation enzymeml:files.
Args:
data_annotation (ET.Element): ElementTree object containing the enzymeml:files annotation.
Returns:
Dict[str, Dict[str, str]]: Dictionary of all files present in the annotation.
"""
return {
file.attrib["id"]: {
"file": file.attrib["file"],
"format": file.attrib["format"],
"id": file.attrib["id"],
}
for file in self._get_element(data_annotation, "files")
}
def _parseListOfFormats(self, data_annotation: ET.Element) -> Dict[str, List[dict]]:
"""Extracts the list of formats that areb present in the annotation enzymeml:formats.
Args:
data_annotation (ET.Element): ElementTree object containing the enzymeml:files annotation.
Returns:
Dict[str, List[dict]]: Dictionary of all the formats present in the annotation.
"""
return {
format.attrib["id"]: [column.attrib for column in format]
for format in self._get_element(data_annotation, "formats")
}
def _parseListOfMeasurements(
self, data_annotation: ET.Element, enzmldoc: "EnzymeMLDocument"
) -> Tuple[Dict[str, Measurement], dict]:
"""Extracts teh list of measurements that are present in the annotation enzymeml:measurements.
Args:
data_annotation (ET.Element): ElementTree object containing the enzymeml:measurements annotation.
Returns:
tuple[Dict[str, dict], Dict[str, Measurement]]: Two dictionaries returning the measurement objects and files.
"""
measurements = self._get_element(data_annotation, "listOfMeasurements")
if measurements is None:
# There was a typo and it should be catched here
measurements = self._get_element(data_annotation, "listOfMasurements")
measurement_files = {
measurement.attrib["id"]: measurement.attrib["file"]
for measurement in measurements
if measurement.attrib.get("file")
}
measurement_dict = {
measurement.attrib["id"]: self._parseMeasurement(
measurement, enzmldoc=enzmldoc
)
for measurement in measurements
}
return (measurement_dict, measurement_files)
def _parseMeasurement(
self, measurement: ET.Element, enzmldoc: "EnzymeMLDocument"
) -> Measurement:
"""Extracts individual initial concentrations of a measurement.
Args:
measurement (ET.Element): Measurement XML information
Returns:
Measurement: Initialized measurement object.
"""
# Get conditions (temp, ph)
temperature = measurement.attrib.get("temperature_value")
temperature_unit = measurement.attrib.get("temperature_unit")
ph = measurement.attrib.get("ph")
# Get the unit string of temp if given
if temperature_unit:
temperature_unit = enzmldoc.getUnitString(temperature_unit)
# initialize Measurement object
measurement_object = Measurement(
name=measurement.attrib["name"],
temperature=temperature,
temperature_unit=temperature_unit,
ph=ph,
)
measurement_object.id = measurement.attrib["id"]
temperature_unit_id = measurement.attrib.get("temperature_unit")
measurement_object._temperature_unit_id = temperature_unit_id
measurement_object._enzmldoc = self.enzmldoc
for init_conc_element in measurement:
params, unit_id = self._parse_init_conc_element(init_conc_element, enzmldoc)
measurement_object.addData(**params, log=False)
if params["reactant_id"]:
meas_data = measurement_object.getReactant(params["reactant_id"])
elif params["protein_id"]:
meas_data = measurement_object.getProtein(params["protein_id"])
else:
raise ValueError("Neither 'reactant_id' nor 'protein_id' are defined")
meas_data._unit_id = unit_id
meas_data._enzmldoc = self.enzmldoc
return measurement_object
@staticmethod
def _parse_init_conc_element(element: ET.Element, enzmldoc):
"""Parses initial concentration data of a measurement.
Args:
element (ET.Element): Element containing information about the initial concentration and species.
Raises:
KeyError: If there is neither a protein nor reactant ID.
"""
value = float(element.attrib["value"])
# Convert the unit ID to the corresponding SI string
unit_id = element.attrib["unit"]
unit = enzmldoc._unit_dict[unit_id].name
reactant_id = None
protein_id = None
if "reactant" in element.attrib.keys():
reactant_id = element.attrib["reactant"]
elif "protein" in element.attrib.keys():
protein_id = element.attrib["protein"]
else:
raise KeyError("Neither reactant or protein ID defined.")
return {
"init_conc": float(value),
"unit": unit,
"reactant_id": reactant_id,
"protein_id": protein_id,
}, unit_id
@staticmethod
def _get_element(tree: ET.Element, name: str):
for element in tree.iter("*"):
if name.lower() in element.tag.lower():
return element
return None
def _getFiles(self, enzmldoc):
"""Extracts all added files fro the archive.
Args:
archive (CombineArchive): The OMEX archive to extract files from.
enzmldoc (EnzymeMLDocument): The EnzymeMLDocument to add files to.
"""
# Iterate over enries and extract files
for file_location in self.archive.getAllLocations():
file_location = str(file_location)
if "./files/" in file_location:
# Convert raw file to file_handle
file_handle = tempfile.NamedTemporaryFile()
file_handle.name = os.path.basename(file_location)
# Write file to temporary file
path = f"./{file_handle.name}"
self.archive.extractEntry(file_location, path)
with open(path, "rb") as f:
file_handle.write(f.read())
file_handle.seek(0)
# Add the file to the EnzymeMLDocument
enzmldoc.addFile(file_handle=file_handle)
# Remove the temporary file
os.remove(path)