2023-06-10 14:49:33 +01:00

123 lines
4.5 KiB
Python

from pprint import pprint
import deepdiff
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient, BIBLIOGRAPHY_WS, SEQUENCE_WS, STRAIN_WS
from mirri.biolomics.serializers.sequence import GenomicSequenceBiolomics
from mirri.biolomics.serializers.strain import StrainMirri
from mirri.entities.publication import Publication
def retrieve_strain_by_accession_number(client, accession_number):
query = {"Query": [{"Index": 0,
"FieldName": "Collection accession number",
"Operation": "TextExactMatch",
"Value": accession_number}],
"Expression": "Q0",
"DisplayStart": 0,
"DisplayLength": 10}
result = client.search(STRAIN_WS, query=query)
total = result["total"]
if total == 0:
return None
elif total == 1:
return result["records"][0]
else:
msg = f"More than one entries for {accession_number} in database"
raise ValueError(msg)
def get_or_create_publication(client: BiolomicsMirriClient, pub: Publication):
new_pub = client.retrieve_by_name(BIBLIOGRAPHY_WS, pub.title)
if new_pub is not None:
return {'record': new_pub, 'created': False}
new_pub = client.create(BIBLIOGRAPHY_WS, pub)
return {'record': new_pub, 'created': True}
def get_or_create_sequence(client: BiolomicsMirriClient, sequence: GenomicSequenceBiolomics):
seq = client.retrieve_by_name(SEQUENCE_WS, sequence.marker_id)
if seq is not None:
return {'record': seq, 'created': False}
new_seq = client.create(SEQUENCE_WS, sequence)
return {'record': new_seq, 'created': True}
def get_or_create_or_update_strain(client: BiolomicsMirriClient,
record: StrainMirri, update=False):
response = get_or_create_strain(client, record)
new_record = response['record']
created = response['created']
if created:
return {'record': new_record, 'created': True, 'updated': False}
if not update:
return {'record': new_record, 'created': False, 'updated': False}
if record.record_id is None:
record.record_id = new_record.record_id
if record.record_name is None:
record.record_name = new_record.record_name
if record.synonyms is None or record.synonyms == []:
record.synonyms = new_record.synonyms
# compare_strains
# we exclude pub id as it is an internal reference of pub and can be changed
diffs = deepdiff.DeepDiff(new_record.dict(), record.dict(),
ignore_order=True, exclude_paths=None,
exclude_regex_paths=[r"root\[\'publications\'\]\[\d+\]\[\'id\'\]",
r"root\[\'publications\'\]\[\d+\]\[\'RecordId\'\]",
r"root\[\'genetics\'\]\[\'Markers\'\]\[\d+\]\[\'RecordId\'\]",
r"root\[\'genetics\'\]\[\'Markers\'\]\[\d+\]\[\'RecordName\'\]"])
if diffs:
pprint(diffs, width=200)
# pprint('en el que yo mando')
# pprint(record.dict())
# pprint('lo que hay en db')
# pprint(new_record.dict())
records_are_different = True if diffs else False
if records_are_different:
updated_record = update_strain(client, record)
updated = True
else:
updated_record = record
updated = False
return {'record': updated_record, 'created': False, 'updated': updated}
def get_or_create_strain(client: BiolomicsMirriClient, strain: StrainMirri):
new_strain = retrieve_strain_by_accession_number(client, strain.id.strain_id)
if new_strain is not None:
return {'record': new_strain, 'created': False}
new_strain = create_strain(client, strain)
return {'record': new_strain, 'created': True}
def create_strain(client: BiolomicsMirriClient, strain: StrainMirri):
for pub in strain.publications:
creation_response = get_or_create_publication(client, pub)
for marker in strain.genetics.markers:
creation_response = get_or_create_sequence(client, marker)
new_strain = client.create(STRAIN_WS, strain)
return new_strain
def update_strain(client: BiolomicsMirriClient, strain: StrainMirri):
for pub in strain.publications:
creation_response = get_or_create_publication(client, pub)
for marker in strain.genetics.markers:
creation_response = get_or_create_sequence(client, marker)
new_strain = client.update(STRAIN_WS, strain)
return new_strain