Source code for pyenzyme.enzymeml.core.measurement

# File: measurement.py
# Project: core
# Author: Jan Range
# License: BSD-2 clause
# Copyright (c) 2022 Institute of Biochemistry and Technical Biochemistry Stuttgart

import copy
import logging
import pandas as pd

from typing import List, Dict, Tuple, Optional, TYPE_CHECKING, Union
from dataclasses import dataclass
from pydantic import validate_arguments, Field, PrivateAttr, validator

from pyenzyme.enzymeml.core.enzymemlbase import EnzymeMLBase
from pyenzyme.enzymeml.core.measurementData import MeasurementData
from pyenzyme.enzymeml.core.replicate import Replicate
from pyenzyme.enzymeml.core.exceptions import SpeciesNotFoundError
from pyenzyme.utils.log import log_object
from pyenzyme.enzymeml.core.utils import type_checking, deprecated_getter

if TYPE_CHECKING:  # pragma: no cover
    static_check_init_args = dataclass
else:
    static_check_init_args = type_checking

# Initialize the logger
logger = logging.getLogger("pyenzyme")


[docs]@static_check_init_args
class Measurement(EnzymeMLBase):

    name: str = Field(
        ...,
        description="Name of the measurement",
    )

    temperature: Optional[float] = Field(
        None,
        description="Numeric value of the temperature of the reaction.",
        template_alias="Temperature value",
    )

    temperature_unit: Optional[str] = Field(
        None,
        description="Unit of the temperature of the reaction.",
        regex=r"kelvin|Kelvin|k|K|celsius|Celsius|C|c",
    )

    ph: Optional[float] = Field(
        None,
        description="PH value of the reaction.",
        inclusiveMinimum=0,
        inclusiveMaximum=14,
    )

    species_dict: Dict[str, Dict[str, MeasurementData]] = Field(
        {"proteins": {}, "reactants": {}},
        description="Species of the measurement.",
    )

    global_time: List[float] = Field(
        default_factory=list,
        description="Global time of the measurement all replicates agree on.",
    )

    global_time_unit: Optional[str] = Field(
        None,
        description="Unit of the global time.",
    )

    id: Optional[str] = Field(
        None, description="Unique identifier of the measurement.", regex=r"m[\d]+"
    )

    uri: Optional[str] = Field(
        None,
        description="URI of the reaction.",
    )

    creator_id: Optional[str] = Field(
        None,
        description="Unique identifier of the author.",
    )

    # * Private attributes
    _temperature_unit_id: str = PrivateAttr(None)
    _global_time_unit_id: str = PrivateAttr(None)
    _enzmldoc = PrivateAttr(default=None)

    # ! Validators
[docs]    @validator("temperature_unit")
    def convert_temperature_unit(cls, unit, values):
        """Converts celsius to kelvin due to SBML limitations"""

        if unit:
            if unit.lower() in ["celsius", "c"]:
                values["temperature"] = values["temperature"] + 273.15
                return "K"

        return unit

    # ! Utility methods
    def __repr__(self):
        return self.printMeasurementScheme(stdout=False)

[docs]    def printMeasurementScheme(
        self, species_type: str = "all", stdout: bool = True
    ) -> Optional[str]:
        """Prints the scheme of the measurement and as such an overview of what has been done.

        Args:
            species_type (str, optional): Specifies whether only "reactants"/"proteins" should be displayed or all of them. Defaults to "all".
        """

        # Get all measurement data objects
        reactants = self.getReactants()
        proteins = self.getProteins()

        if species_type == "reactants":
            species = list(reactants.values())
        elif species_type == "proteins":
            species = list(proteins.values())
        elif species_type == "all":
            species = list(reactants.values()) + list(proteins.values())
        else:
            raise ValueError(
                f"Species type of {species_type} is not supported. Please enter use one of the following: 'reactants', 'proteins' or 'all'."
            )

        # Start printing
        output = []
        output.append(f">>> Measurement {self.id}: {self.name}")

        for meas_data in species:
            output.append(
                f"    {meas_data.get_id()} | initial conc: {meas_data.init_conc} {meas_data.unit} \t| #replicates: {len(meas_data.replicates)}"
            )

        output = "\n".join(output)

        if stdout:
            print(output)
        else:
            return output

[docs]    def exportData(
        self, species_ids: Union[str, List[str]] = "all"
    ) -> Dict[str, Dict[str, Union[Dict[str, Tuple[float, str]], pd.DataFrame]]]:
        """Returns data stored in the measurement object as DataFrames nested in dictionaries. These are sorted hierarchially by reactions where each holds a DataFrame each for proteins and reactants.

        Returns:
            measurements (dict): Follows the hierarchy Reactions > Proteins/Reactants > { initial_concentration, data }
            species_ids (Union[str, List[str]]): List of species IDs to extract data from. Defaults to 'all'.
        """

        # Combine Replicate objects for each reaction
        proteins = self._combineReplicates(
            measurement_species=self.species_dict["proteins"], species_ids=species_ids
        )
        reactants = self._combineReplicates(
            measurement_species=self.species_dict["reactants"], species_ids=species_ids
        )

        return {"proteins": proteins, "reactants": reactants}

    def _combineReplicates(
        self,
        measurement_species: Dict[str, MeasurementData],
        species_ids: Union[str, List[str]] = "all",
    ) -> Dict[str, Union[Dict[str, Tuple[float, str]], pd.DataFrame]]:
        """Combines replicates of a certain species to a dataframe.

        Args:
            measurement_species (Dict[str, MeasurementData]): The species_dict from the measurement.

        Returns:
            Dict[str, Any]: The associated replicat and initconc data.
        """

        if isinstance(species_ids, str):
            species_ids = [species_ids]

        columns = {}
        initial_concentration = {}
        num_replicates = 0

        # Iterate over measurementData to fill columns
        for species_id, data in measurement_species.items():

            if species_id in species_ids or species_ids == ["all"]:

                # Fetch initial concentration
                initial_concentration[species_id] = (data.init_conc, data.unit)

                # Fetch replicate data
                if len(data.replicates) > num_replicates:
                    num_replicates = len(data.replicates)
                for replicate in data.replicates:

                    if columns.get(species_id):
                        # For multiple replicates
                        columns[species_id] += copy.deepcopy(replicate.data)
                    else:
                        columns[species_id] = copy.deepcopy(replicate.data)

        # Add global time to columns according to the number of replicates
        columns["time"] = self.global_time * num_replicates

        return {
            "data": pd.DataFrame(columns) if len(columns) > 1 else pd.DataFrame(),
            "initConc": initial_concentration,
        }

[docs]    @validate_arguments
    def addReplicates(
        self, replicates: Union[List[Replicate], Replicate], enzmldoc, log: bool = True
    ) -> None:
        """Adds a replicate to the corresponding measurementData object. This method is meant to be called if the measurement metadata of a reaction/species has already been done and replicate data has to be added afterwards. If not, use addData instead to introduce the species metadata.

        Args:
            replicate (List<Replicate>): Objects describing time course data
        """

        # Check if just a single Replicate has been handed
        if isinstance(replicates, Replicate):
            replicates = [replicates]

        for replicate in replicates:

            # Check for the species type
            species_id = replicate.species_id
            speciesType = "reactants" if species_id[0] == "s" else "proteins"
            speciesData = self.species_dict[speciesType]

            try:

                data = speciesData[species_id]

                replicate._data_unit_id = enzmldoc._convertToUnitDef(
                    replicate.data_unit
                )
                replicate._time_unit_id = enzmldoc._convertToUnitDef(
                    replicate.time_unit
                )

                data.addReplicate(replicate)

                if len(self.global_time) == 0:

                    # Add global time if this is the first replicate to be added
                    self.global_time = replicate.time
                    self.global_time_unit = replicate.time_unit
                    self._global_time_unit_id = replicate._time_unit_id

                # Log Replicate creation
                if log:
                    log_object(logger, replicate)
                    logger.debug(
                        f"Added {type(replicate).__name__} '{replicate.id}' to data '{data.get_id()}' of measurement '{self.name}'"
                    )

            except KeyError:
                raise KeyError(
                    f"{speciesType[0:-1]} {species_id} is not part of the measurement yet. If a {speciesType[0:-1]} hasnt been yet defined in a measurement object, use the addData method to define metadata first-hand. You can add the replicates in the same function call then."
                )

[docs]    @validate_arguments
    def addData(
        self,
        unit: str,
        init_conc: float = 0.0,
        reactant_id: Optional[str] = None,
        protein_id: Optional[str] = None,
        replicates: List[Replicate] = [],
        log: bool = True,
    ) -> None:
        """Adds data to the measurement object.

        Args:
            init_conc (PositiveFloat): Corresponding initial concentration of species.
            unit (str): The SI unit of the measurement.
            reactant_id (Optional[str], optional): Identifier of the reactant that was measured. Defaults to None.
            protein_id (Optional[str], optional): Identifier of the protein that was measured. Defaults to None.
            replicates (List[Replicate], optional): List of replicates that were measured. Defaults to [].
        """

        self._appendReactantData(
            reactant_id=reactant_id,
            protein_id=protein_id,
            init_conc=init_conc,
            unit=unit,
            replicates=replicates,
            log=log,
        )

    def _appendReactantData(
        self,
        reactant_id: Optional[str],
        protein_id: Optional[str],
        init_conc: float,
        unit: str,
        replicates: List[Replicate],
        log: bool = True,
    ) -> None:

        # Create measurement data class before sorting
        measData = MeasurementData(
            reactant_id=reactant_id,
            protein_id=protein_id,
            init_conc=init_conc,
            unit=unit,
            replicates=replicates,
            measurement_id=self.id,
        )

        if reactant_id:
            self.species_dict["reactants"][reactant_id] = measData
        elif protein_id:
            self.species_dict["proteins"][protein_id] = measData
        else:
            raise ValueError(
                "Please enter a reactant or protein ID to add measurement data"
            )

        # Log the new object
        if log:
            log_object(logger, measData)
            logger.debug(
                f"Added {type(measData).__name__} '{measData.get_id()}' to measurement '{self.name}'"
            )

[docs]    def updateReplicates(self, replicates: List[Replicate]) -> None:
        for replicate in replicates:
            # Set the measurement name for the replicate
            replicate.measurement_id = self.name

    def _setReplicateMeasIDs(self) -> None:
        """Sets the measurement IDs for the replicates."""
        for measData in self.species_dict["proteins"].values():
            measData.measurement_id = self.id

        for measData in self.species_dict["reactants"].values():
            measData.measurement_id = self.id

[docs]    def unifyUnits(self, kind: str, scale: int, enzmldoc) -> None:
        """Rescales all replicates and measurements to match the scale of a unit kind.

        Args:
            kind (str): The unit kind from which to rescale. Currently supported: 'mole', 'gram', 'litre'.
            scale (int): Decade scale to which the values will be rescaled.
            enzmldoc (EnzymeMLDocument): The EnzymeMLDocument to which the new unit will be added.
        """

        if kind not in ["mole", "gram", "litre"]:
            raise ValueError(
                f"Kind {kind} is not supported. Please use 'mole', 'gram', or 'litre'"
            )

        if abs(scale) % 3 > 0:
            if abs(scale) == 1:
                pass
            else:
                raise ValueError(
                    f"Scale {scale} is not a multiple of 3. Please make sure the scale is a multiple of 3."
                )

        for measurement_data in {**self.getProteins(), **self.getReactants()}.values():
            measurement_data.unifyUnits(kind=kind, scale=scale, enzmldoc=enzmldoc)

    def _has_replicates(self) -> bool:
        """Checks whether replicates are present in the measurement.

        This is only used for the to check whether to write time course data or not.

        Returns:
            bool: Returns True if there are any replicate otherwise False.
        """

        all_species = {
            **self.species_dict["proteins"],
            **self.species_dict["reactants"],
        }

        for obj in all_species.values():
            if len(obj.replicates) > 0:
                return True

        return False

    # ! Getters
[docs]    def temperature_unitdef(self):
        """Returns the appropriate unitdef if an enzmldoc is given"""

        if not self._enzmldoc:
            return None

        return self._enzmldoc._unit_dict[self._temperature_unit_id]

[docs]    @validate_arguments
    def getReactant(self, reactant_id: str) -> MeasurementData:
        """Returns a single MeasurementData object for the given reactant_id.

        Args:
            reactant_id (String): Unqiue identifier of the reactant

        Returns:
            MeasurementData: Object representing the data and initial concentration
        """
        return self._getSpecies(reactant_id)

[docs]    def getProtein(self, protein_id: str) -> MeasurementData:
        """Returns a single MeasurementData object for the given protein_id.

        Args:
            protein_id (String): Unqiue identifier of the protein

        Returns:
            MeasurementData: Object representing the data and initial concentration
        """
        return self._getSpecies(protein_id)

[docs]    def getReactants(self) -> Dict[str, MeasurementData]:
        """Returns a dict of all participating reactants in the measurement.

        Returns:
            dict: Dict of MeasurementData objects representing data
        """
        return self.species_dict["reactants"]

[docs]    def getProteins(self) -> Dict[str, MeasurementData]:
        """Returns a dict of all participating proteins in the measurement.

        Returns:
            dict: Dict of MeasurementData objects representing data
        """
        return self.species_dict["proteins"]

    def _getAllSpecies(self):
        return {**self.species_dict["proteins"], **self.species_dict["reactants"]}

    @validate_arguments
    def _getSpecies(self, species_id: str) -> MeasurementData:
        all_species = {
            **self.species_dict["proteins"],
            **self.species_dict["reactants"],
        }

        try:
            return all_species[species_id]
        except KeyError:
            raise SpeciesNotFoundError(
                species_id=species_id, enzymeml_part="Measurement"
            )

[docs]    @deprecated_getter("id")
    def getId(self):
        return self.id

[docs]    @deprecated_getter("global_time_unit")
    def getGlobalTimeUnit(self):
        return self.global_time_unit

[docs]    @deprecated_getter("global_time")
    def getGlobalTime(self):
        return self.global_time

[docs]    @deprecated_getter("name")
    def getName(self):
        return self.name

[docs]    @deprecated_getter("species_dict")
    def getSpeciesDict(self):
        return self.species_dict