Source code for pyenzyme.enzymeml.databases.dataverse

# File: dataverse.py
# Project: core
# Author: Jan Range
# License: BSD-2 clause
# Copyright (c) 2022 Institute of Biochemistry and Technical Biochemistry Stuttgart

import json
import os
import pydantic

from typing import Dict, Any, Optional
from pyDaRUS import EnzymeMl, Citation, Dataset
from pyDaRUS.metadatablocks.enzymeML import Constant
from pyDaRUS.metadatablocks.citation import SubjectEnum


[docs]def uploadToDataverse( enzmldoc, dataverse_name: str, base_url: Optional[str] = None, api_token: Optional[str] = None, ) -> None: """Uploads a givene EnzymeMLDocument object to a dataverse installation. It should be noted, that the environment variables 'DATAVERSE_URL' and 'DATAVERSE_API_TOKEN' should be given approriately before the upload. If not, tje upload cant be done. Args: enzmldoc (EnzymeMLDocument): The EnzymeMLDocument to be uploaded. dataverse_name (str): Name of the dataverse to be uploaded to. base_url (str): Base URL of the dataverse to upload. Defaults to None. If None the URL will be drawn from env vars. api_token (str): API Token of the dataverse to upload. Defaults to None. If None the API Token will be drawn from env vars. """ # Fill in all the metadatablocks enzml_meta = create_enzymeml_metadatablock(enzmldoc) citation_meta = create_citation_metadatablock(enzmldoc) # Initialize a dataset for the upload dataset = Dataset() # Add all metadatablocks dataset.add_metadatablock(enzml_meta) dataset.add_metadatablock(citation_meta) # Write EnzymeMLDocument to file archive_name = f"{enzmldoc.name.replace(' ', '_')}_dv_upload" enzmldoc.toFile(".", name=archive_name) try: dataset.add_file( dv_path=f"{archive_name}.omex", local_path=f"{archive_name}.omex" ) dataset.upload( dataverse_name=dataverse_name, DATAVERSE_URL=base_url, API_TOKEN=api_token, ) except Exception as e: os.remove(f"{archive_name}.omex") raise e # Remove the unsued EnzymeML document os.remove(f"{archive_name}.omex")
[docs]def create_enzymeml_metadatablock( enzmldoc, ): # Initialize the EnzymeML metadatablock enzml_meta = EnzymeMl() # Vessels vessel_mapping = { "name": "name", "volume": "size", "unit": "unit", "constant": "constant", } for vessel in enzmldoc.vessel_dict.values(): json_data = json.loads(vessel.json()) # Apply corrections to match controlled vocabs json_data["constant"] = ( Constant.constant.value if json_data["constant"] else Constant.not_constant.value ) add_object(json_data, vessel_mapping, enzml_meta.add_vessels) protein_mapping = { # "id": "identifier", "name": "name", "vessel_id": "vessel_reference", "init_conc": "initial_concentration", "unit": "unit", "constant": "constant", "sequence": "sequence", "organism": "organism", "uniprotid": "uniprotid", "ecnumber": "ecnumber", "ontology": "sbo_term", } for protein in enzmldoc.protein_dict.values(): json_data = json.loads(protein.json()) # Apply corrections to match controlled vocabs json_data["constant"] = ( Constant.constant.value if json_data["constant"] else Constant.not_constant.value ) add_object(json_data, protein_mapping, enzml_meta.add_proteins) reactant_mapping = { # "id": "identifier", "name": "name", "vessel_id": "vessel_reference", "init_conc": "initial_concentration", "unit": "unit", "constant": "constant", "inchi": "inchicode", "smiles": "smilescode", "ontology": "sbo_term", } for reactant in enzmldoc.reactant_dict.values(): json_data = json.loads(reactant.json()) # Apply corrections to match controlled vocabs json_data["constant"] = ( Constant.constant.value if json_data["constant"] else Constant.not_constant.value ) add_object(json_data, reactant_mapping, enzml_meta.add_reactants) reaction_mapping = { "name": "name", "temperature": "temperature_value", "temperature_unit": "temperature_unit", "ph": "ph_value", } for reaction in enzmldoc.reaction_dict.values(): params: Dict[str, Any] = { reaction_mapping.get(key): item for key, item in reaction.dict().items() if reaction_mapping.get(key) and repr(item) != "nan" and item } # Apply corrections if params.get("temperature_unit"): params["temperature_unit"] = ( "Kelvin" if params["temperature_unit"] == "K" else "Celsius" ) # Extract al elements present in the reaction educts = [ enzmldoc.getAny(element.species_id).name for element in reaction.educts ] products = [ enzmldoc.getAny(element.species_id).name for element in reaction.products ] modifiers = [ enzmldoc.getAny(element.species_id).name for element in reaction.products ] # Create corresponding string representations params["educts"] = ", ".join(educts) params["products"] = ", ".join(products) params["modifiers"] = ", ".join(modifiers) params["equation"] = " + ".join(educts) + " -> " + " + ".join(products) enzml_meta.add_reactions(**params) if reaction.model: # Report on the model if given law_params = kinetic_law_params(reaction) enzml_meta.add_kinetic_law(**law_params) for param in reaction.model.parameters: json_data = json.loads(param.json()) enzml_meta.add_kinetic_parameters( name=f"{json_data['name']}_{reaction.id}", value=json_data["value"], unit=json_data["unit"], sbo_term=json_data.get("ontology"), ) return enzml_meta
[docs]def add_object(json_data, mapping, add_fun): params = { mapping.get(key): item for key, item in json_data.items() if mapping.get(key) and repr(item) != "nan" and item } # TODO fix metadatablock to accept other units than MOLAR if params.get("unit"): params["unit"] = params["unit"].replace("mole / l", "M") try: # Add infos to metadatablock add_fun(**params) except pydantic.ValidationError as e: # TODO find a better way to handle this error for error in e.errors(): if error["loc"][0] == "unit": params.pop("unit") add_fun(**params) return None raise e
[docs]def kinetic_law_params(reaction: "EnzymeReaction") -> Dict[str, str]: # noqa: F821 """Retrieves the arguments to add a kinetic law to an EnzymeML Metadatablock""" kinetic_law_mapping = {"name": "name", "equation": "kinetic_model"} # Get the model model = reaction.model params = { kinetic_law_mapping.get(key): item for key, item in model.dict().items() if kinetic_law_mapping.get(key) and repr(item) != "nan" and item } params["reaction_reference"] = reaction.id return params
[docs]def create_citation_metadatablock(enzmldoc: "EnzymeMLDocument"): # noqa: F821 # Initialize the Citation metadatablock citation_meta = Citation( title=enzmldoc.name, subject=[ SubjectEnum.chemistry, SubjectEnum.medicine___health_and__life__sciences, ], ) # Add author information for creator in enzmldoc.creator_dict.values(): name = f"{creator.given_name} {creator.family_name}" citation_meta.add_author(name=name) citation_meta.add_contact(name=name, email=creator.mail) # Add descripiton citation_meta.add_description( text=f"EnzymeML document reporting on {enzmldoc.name}" ) return citation_meta