Source code for pyenzyme.enzymeml.databases.dataverse
# File: dataverse.py
# Project: core
# Author: Jan Range
# License: BSD-2 clause
# Copyright (c) 2022 Institute of Biochemistry and Technical Biochemistry Stuttgart
import json
import os
import pydantic
from typing import Dict, Any, Optional
from pyDaRUS import EnzymeMl, Citation, Dataset
from pyDaRUS.metadatablocks.enzymeML import Constant
from pyDaRUS.metadatablocks.citation import SubjectEnum
[docs]def uploadToDataverse(
enzmldoc,
dataverse_name: str,
base_url: Optional[str] = None,
api_token: Optional[str] = None,
) -> None:
"""Uploads a givene EnzymeMLDocument object to a dataverse installation.
It should be noted, that the environment variables 'DATAVERSE_URL' and 'DATAVERSE_API_TOKEN'
should be given approriately before the upload. If not, tje upload cant be done.
Args:
enzmldoc (EnzymeMLDocument): The EnzymeMLDocument to be uploaded.
dataverse_name (str): Name of the dataverse to be uploaded to.
base_url (str): Base URL of the dataverse to upload. Defaults to None. If None the URL will be drawn from env vars.
api_token (str): API Token of the dataverse to upload. Defaults to None. If None the API Token will be drawn from env vars.
"""
# Fill in all the metadatablocks
enzml_meta = create_enzymeml_metadatablock(enzmldoc)
citation_meta = create_citation_metadatablock(enzmldoc)
# Initialize a dataset for the upload
dataset = Dataset()
# Add all metadatablocks
dataset.add_metadatablock(enzml_meta)
dataset.add_metadatablock(citation_meta)
# Write EnzymeMLDocument to file
archive_name = f"{enzmldoc.name.replace(' ', '_')}_dv_upload"
enzmldoc.toFile(".", name=archive_name)
try:
dataset.add_file(
dv_path=f"{archive_name}.omex", local_path=f"{archive_name}.omex"
)
dataset.upload(
dataverse_name=dataverse_name,
DATAVERSE_URL=base_url,
API_TOKEN=api_token,
)
except Exception as e:
os.remove(f"{archive_name}.omex")
raise e
# Remove the unsued EnzymeML document
os.remove(f"{archive_name}.omex")
[docs]def create_enzymeml_metadatablock(
enzmldoc,
):
# Initialize the EnzymeML metadatablock
enzml_meta = EnzymeMl()
# Vessels
vessel_mapping = {
"name": "name",
"volume": "size",
"unit": "unit",
"constant": "constant",
}
for vessel in enzmldoc.vessel_dict.values():
json_data = json.loads(vessel.json())
# Apply corrections to match controlled vocabs
json_data["constant"] = (
Constant.constant.value
if json_data["constant"]
else Constant.not_constant.value
)
add_object(json_data, vessel_mapping, enzml_meta.add_vessels)
protein_mapping = {
# "id": "identifier",
"name": "name",
"vessel_id": "vessel_reference",
"init_conc": "initial_concentration",
"unit": "unit",
"constant": "constant",
"sequence": "sequence",
"organism": "organism",
"uniprotid": "uniprotid",
"ecnumber": "ecnumber",
"ontology": "sbo_term",
}
for protein in enzmldoc.protein_dict.values():
json_data = json.loads(protein.json())
# Apply corrections to match controlled vocabs
json_data["constant"] = (
Constant.constant.value
if json_data["constant"]
else Constant.not_constant.value
)
add_object(json_data, protein_mapping, enzml_meta.add_proteins)
reactant_mapping = {
# "id": "identifier",
"name": "name",
"vessel_id": "vessel_reference",
"init_conc": "initial_concentration",
"unit": "unit",
"constant": "constant",
"inchi": "inchicode",
"smiles": "smilescode",
"ontology": "sbo_term",
}
for reactant in enzmldoc.reactant_dict.values():
json_data = json.loads(reactant.json())
# Apply corrections to match controlled vocabs
json_data["constant"] = (
Constant.constant.value
if json_data["constant"]
else Constant.not_constant.value
)
add_object(json_data, reactant_mapping, enzml_meta.add_reactants)
reaction_mapping = {
"name": "name",
"temperature": "temperature_value",
"temperature_unit": "temperature_unit",
"ph": "ph_value",
}
for reaction in enzmldoc.reaction_dict.values():
params: Dict[str, Any] = {
reaction_mapping.get(key): item
for key, item in reaction.dict().items()
if reaction_mapping.get(key) and repr(item) != "nan" and item
}
# Apply corrections
if params.get("temperature_unit"):
params["temperature_unit"] = (
"Kelvin" if params["temperature_unit"] == "K" else "Celsius"
)
# Extract al elements present in the reaction
educts = [
enzmldoc.getAny(element.species_id).name for element in reaction.educts
]
products = [
enzmldoc.getAny(element.species_id).name for element in reaction.products
]
modifiers = [
enzmldoc.getAny(element.species_id).name for element in reaction.products
]
# Create corresponding string representations
params["educts"] = ", ".join(educts)
params["products"] = ", ".join(products)
params["modifiers"] = ", ".join(modifiers)
params["equation"] = " + ".join(educts) + " -> " + " + ".join(products)
enzml_meta.add_reactions(**params)
if reaction.model:
# Report on the model if given
law_params = kinetic_law_params(reaction)
enzml_meta.add_kinetic_law(**law_params)
for param in reaction.model.parameters:
json_data = json.loads(param.json())
enzml_meta.add_kinetic_parameters(
name=f"{json_data['name']}_{reaction.id}",
value=json_data["value"],
unit=json_data["unit"],
sbo_term=json_data.get("ontology"),
)
return enzml_meta
[docs]def add_object(json_data, mapping, add_fun):
params = {
mapping.get(key): item
for key, item in json_data.items()
if mapping.get(key) and repr(item) != "nan" and item
}
# TODO fix metadatablock to accept other units than MOLAR
if params.get("unit"):
params["unit"] = params["unit"].replace("mole / l", "M")
try:
# Add infos to metadatablock
add_fun(**params)
except pydantic.ValidationError as e:
# TODO find a better way to handle this error
for error in e.errors():
if error["loc"][0] == "unit":
params.pop("unit")
add_fun(**params)
return None
raise e
[docs]def kinetic_law_params(reaction: "EnzymeReaction") -> Dict[str, str]: # noqa: F821
"""Retrieves the arguments to add a kinetic law to an EnzymeML Metadatablock"""
kinetic_law_mapping = {"name": "name", "equation": "kinetic_model"}
# Get the model
model = reaction.model
params = {
kinetic_law_mapping.get(key): item
for key, item in model.dict().items()
if kinetic_law_mapping.get(key) and repr(item) != "nan" and item
}
params["reaction_reference"] = reaction.id
return params
[docs]def create_citation_metadatablock(enzmldoc: "EnzymeMLDocument"): # noqa: F821
# Initialize the Citation metadatablock
citation_meta = Citation(
title=enzmldoc.name,
subject=[
SubjectEnum.chemistry,
SubjectEnum.medicine___health_and__life__sciences,
],
)
# Add author information
for creator in enzmldoc.creator_dict.values():
name = f"{creator.given_name} {creator.family_name}"
citation_meta.add_author(name=name)
citation_meta.add_contact(name=name, email=creator.mail)
# Add descripiton
citation_meta.add_description(
text=f"EnzymeML document reporting on {enzmldoc.name}"
)
return citation_meta