Source code for pyenzyme.enzymeml.core.measurement

# File: measurement.py
# Project: core
# Author: Jan Range
# License: BSD-2 clause
# Copyright (c) 2022 Institute of Biochemistry and Technical Biochemistry Stuttgart

import copy
import logging
import pandas as pd

from typing import List, Dict, Tuple, Optional, TYPE_CHECKING, Union
from dataclasses import dataclass
from pydantic import validate_arguments, Field, PrivateAttr, validator

from pyenzyme.enzymeml.core.enzymemlbase import EnzymeMLBase
from pyenzyme.enzymeml.core.measurementData import MeasurementData
from pyenzyme.enzymeml.core.replicate import Replicate
from pyenzyme.enzymeml.core.exceptions import SpeciesNotFoundError
from pyenzyme.utils.log import log_object
from pyenzyme.enzymeml.core.utils import type_checking, deprecated_getter

if TYPE_CHECKING:  # pragma: no cover
    static_check_init_args = dataclass
else:
    static_check_init_args = type_checking

# Initialize the logger
logger = logging.getLogger("pyenzyme")


[docs]@static_check_init_args class Measurement(EnzymeMLBase): name: str = Field( ..., description="Name of the measurement", ) temperature: Optional[float] = Field( None, description="Numeric value of the temperature of the reaction.", template_alias="Temperature value", ) temperature_unit: Optional[str] = Field( None, description="Unit of the temperature of the reaction.", regex=r"kelvin|Kelvin|k|K|celsius|Celsius|C|c", ) ph: Optional[float] = Field( None, description="PH value of the reaction.", inclusiveMinimum=0, inclusiveMaximum=14, ) species_dict: Dict[str, Dict[str, MeasurementData]] = Field( {"proteins": {}, "reactants": {}}, description="Species of the measurement.", ) global_time: List[float] = Field( default_factory=list, description="Global time of the measurement all replicates agree on.", ) global_time_unit: Optional[str] = Field( None, description="Unit of the global time.", ) id: Optional[str] = Field( None, description="Unique identifier of the measurement.", regex=r"m[\d]+" ) uri: Optional[str] = Field( None, description="URI of the reaction.", ) creator_id: Optional[str] = Field( None, description="Unique identifier of the author.", ) # * Private attributes _temperature_unit_id: str = PrivateAttr(None) _global_time_unit_id: str = PrivateAttr(None) _enzmldoc = PrivateAttr(default=None) # ! Validators
[docs] @validator("temperature_unit") def convert_temperature_unit(cls, unit, values): """Converts celsius to kelvin due to SBML limitations""" if unit: if unit.lower() in ["celsius", "c"]: values["temperature"] = values["temperature"] + 273.15 return "K" return unit
# ! Utility methods def __repr__(self): return self.printMeasurementScheme(stdout=False)
[docs] def printMeasurementScheme( self, species_type: str = "all", stdout: bool = True ) -> Optional[str]: """Prints the scheme of the measurement and as such an overview of what has been done. Args: species_type (str, optional): Specifies whether only "reactants"/"proteins" should be displayed or all of them. Defaults to "all". """ # Get all measurement data objects reactants = self.getReactants() proteins = self.getProteins() if species_type == "reactants": species = list(reactants.values()) elif species_type == "proteins": species = list(proteins.values()) elif species_type == "all": species = list(reactants.values()) + list(proteins.values()) else: raise ValueError( f"Species type of {species_type} is not supported. Please enter use one of the following: 'reactants', 'proteins' or 'all'." ) # Start printing output = [] output.append(f">>> Measurement {self.id}: {self.name}") for meas_data in species: output.append( f" {meas_data.get_id()} | initial conc: {meas_data.init_conc} {meas_data.unit} \t| #replicates: {len(meas_data.replicates)}" ) output = "\n".join(output) if stdout: print(output) else: return output
[docs] def exportData( self, species_ids: Union[str, List[str]] = "all" ) -> Dict[str, Dict[str, Union[Dict[str, Tuple[float, str]], pd.DataFrame]]]: """Returns data stored in the measurement object as DataFrames nested in dictionaries. These are sorted hierarchially by reactions where each holds a DataFrame each for proteins and reactants. Returns: measurements (dict): Follows the hierarchy Reactions > Proteins/Reactants > { initial_concentration, data } species_ids (Union[str, List[str]]): List of species IDs to extract data from. Defaults to 'all'. """ # Combine Replicate objects for each reaction proteins = self._combineReplicates( measurement_species=self.species_dict["proteins"], species_ids=species_ids ) reactants = self._combineReplicates( measurement_species=self.species_dict["reactants"], species_ids=species_ids ) return {"proteins": proteins, "reactants": reactants}
def _combineReplicates( self, measurement_species: Dict[str, MeasurementData], species_ids: Union[str, List[str]] = "all", ) -> Dict[str, Union[Dict[str, Tuple[float, str]], pd.DataFrame]]: """Combines replicates of a certain species to a dataframe. Args: measurement_species (Dict[str, MeasurementData]): The species_dict from the measurement. Returns: Dict[str, Any]: The associated replicat and initconc data. """ if isinstance(species_ids, str): species_ids = [species_ids] columns = {} initial_concentration = {} num_replicates = 0 # Iterate over measurementData to fill columns for species_id, data in measurement_species.items(): if species_id in species_ids or species_ids == ["all"]: # Fetch initial concentration initial_concentration[species_id] = (data.init_conc, data.unit) # Fetch replicate data if len(data.replicates) > num_replicates: num_replicates = len(data.replicates) for replicate in data.replicates: if columns.get(species_id): # For multiple replicates columns[species_id] += copy.deepcopy(replicate.data) else: columns[species_id] = copy.deepcopy(replicate.data) # Add global time to columns according to the number of replicates columns["time"] = self.global_time * num_replicates return { "data": pd.DataFrame(columns) if len(columns) > 1 else pd.DataFrame(), "initConc": initial_concentration, }
[docs] @validate_arguments def addReplicates( self, replicates: Union[List[Replicate], Replicate], enzmldoc, log: bool = True ) -> None: """Adds a replicate to the corresponding measurementData object. This method is meant to be called if the measurement metadata of a reaction/species has already been done and replicate data has to be added afterwards. If not, use addData instead to introduce the species metadata. Args: replicate (List<Replicate>): Objects describing time course data """ # Check if just a single Replicate has been handed if isinstance(replicates, Replicate): replicates = [replicates] for replicate in replicates: # Check for the species type species_id = replicate.species_id speciesType = "reactants" if species_id[0] == "s" else "proteins" speciesData = self.species_dict[speciesType] try: data = speciesData[species_id] replicate._data_unit_id = enzmldoc._convertToUnitDef( replicate.data_unit ) replicate._time_unit_id = enzmldoc._convertToUnitDef( replicate.time_unit ) data.addReplicate(replicate) if len(self.global_time) == 0: # Add global time if this is the first replicate to be added self.global_time = replicate.time self.global_time_unit = replicate.time_unit self._global_time_unit_id = replicate._time_unit_id # Log Replicate creation if log: log_object(logger, replicate) logger.debug( f"Added {type(replicate).__name__} '{replicate.id}' to data '{data.get_id()}' of measurement '{self.name}'" ) except KeyError: raise KeyError( f"{speciesType[0:-1]} {species_id} is not part of the measurement yet. If a {speciesType[0:-1]} hasnt been yet defined in a measurement object, use the addData method to define metadata first-hand. You can add the replicates in the same function call then." )
[docs] @validate_arguments def addData( self, unit: str, init_conc: float = 0.0, reactant_id: Optional[str] = None, protein_id: Optional[str] = None, replicates: List[Replicate] = [], log: bool = True, ) -> None: """Adds data to the measurement object. Args: init_conc (PositiveFloat): Corresponding initial concentration of species. unit (str): The SI unit of the measurement. reactant_id (Optional[str], optional): Identifier of the reactant that was measured. Defaults to None. protein_id (Optional[str], optional): Identifier of the protein that was measured. Defaults to None. replicates (List[Replicate], optional): List of replicates that were measured. Defaults to []. """ self._appendReactantData( reactant_id=reactant_id, protein_id=protein_id, init_conc=init_conc, unit=unit, replicates=replicates, log=log, )
def _appendReactantData( self, reactant_id: Optional[str], protein_id: Optional[str], init_conc: float, unit: str, replicates: List[Replicate], log: bool = True, ) -> None: # Create measurement data class before sorting measData = MeasurementData( reactant_id=reactant_id, protein_id=protein_id, init_conc=init_conc, unit=unit, replicates=replicates, measurement_id=self.id, ) if reactant_id: self.species_dict["reactants"][reactant_id] = measData elif protein_id: self.species_dict["proteins"][protein_id] = measData else: raise ValueError( "Please enter a reactant or protein ID to add measurement data" ) # Log the new object if log: log_object(logger, measData) logger.debug( f"Added {type(measData).__name__} '{measData.get_id()}' to measurement '{self.name}'" )
[docs] def updateReplicates(self, replicates: List[Replicate]) -> None: for replicate in replicates: # Set the measurement name for the replicate replicate.measurement_id = self.name
def _setReplicateMeasIDs(self) -> None: """Sets the measurement IDs for the replicates.""" for measData in self.species_dict["proteins"].values(): measData.measurement_id = self.id for measData in self.species_dict["reactants"].values(): measData.measurement_id = self.id
[docs] def unifyUnits(self, kind: str, scale: int, enzmldoc) -> None: """Rescales all replicates and measurements to match the scale of a unit kind. Args: kind (str): The unit kind from which to rescale. Currently supported: 'mole', 'gram', 'litre'. scale (int): Decade scale to which the values will be rescaled. enzmldoc (EnzymeMLDocument): The EnzymeMLDocument to which the new unit will be added. """ if kind not in ["mole", "gram", "litre"]: raise ValueError( f"Kind {kind} is not supported. Please use 'mole', 'gram', or 'litre'" ) if abs(scale) % 3 > 0: if abs(scale) == 1: pass else: raise ValueError( f"Scale {scale} is not a multiple of 3. Please make sure the scale is a multiple of 3." ) for measurement_data in {**self.getProteins(), **self.getReactants()}.values(): measurement_data.unifyUnits(kind=kind, scale=scale, enzmldoc=enzmldoc)
def _has_replicates(self) -> bool: """Checks whether replicates are present in the measurement. This is only used for the to check whether to write time course data or not. Returns: bool: Returns True if there are any replicate otherwise False. """ all_species = { **self.species_dict["proteins"], **self.species_dict["reactants"], } for obj in all_species.values(): if len(obj.replicates) > 0: return True return False # ! Getters
[docs] def temperature_unitdef(self): """Returns the appropriate unitdef if an enzmldoc is given""" if not self._enzmldoc: return None return self._enzmldoc._unit_dict[self._temperature_unit_id]
[docs] @validate_arguments def getReactant(self, reactant_id: str) -> MeasurementData: """Returns a single MeasurementData object for the given reactant_id. Args: reactant_id (String): Unqiue identifier of the reactant Returns: MeasurementData: Object representing the data and initial concentration """ return self._getSpecies(reactant_id)
[docs] def getProtein(self, protein_id: str) -> MeasurementData: """Returns a single MeasurementData object for the given protein_id. Args: protein_id (String): Unqiue identifier of the protein Returns: MeasurementData: Object representing the data and initial concentration """ return self._getSpecies(protein_id)
[docs] def getReactants(self) -> Dict[str, MeasurementData]: """Returns a dict of all participating reactants in the measurement. Returns: dict: Dict of MeasurementData objects representing data """ return self.species_dict["reactants"]
[docs] def getProteins(self) -> Dict[str, MeasurementData]: """Returns a dict of all participating proteins in the measurement. Returns: dict: Dict of MeasurementData objects representing data """ return self.species_dict["proteins"]
def _getAllSpecies(self): return {**self.species_dict["proteins"], **self.species_dict["reactants"]} @validate_arguments def _getSpecies(self, species_id: str) -> MeasurementData: all_species = { **self.species_dict["proteins"], **self.species_dict["reactants"], } try: return all_species[species_id] except KeyError: raise SpeciesNotFoundError( species_id=species_id, enzymeml_part="Measurement" )
[docs] @deprecated_getter("id") def getId(self): return self.id
[docs] @deprecated_getter("global_time_unit") def getGlobalTimeUnit(self): return self.global_time_unit
[docs] @deprecated_getter("global_time") def getGlobalTime(self): return self.global_time
[docs] @deprecated_getter("name") def getName(self): return self.name
[docs] @deprecated_getter("species_dict") def getSpeciesDict(self): return self.species_dict