123 lines
4.5 KiB
Python
123 lines
4.5 KiB
Python
from pprint import pprint
|
|
import deepdiff
|
|
|
|
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient, BIBLIOGRAPHY_WS, SEQUENCE_WS, STRAIN_WS
|
|
|
|
from mirri.biolomics.serializers.sequence import GenomicSequenceBiolomics
|
|
from mirri.biolomics.serializers.strain import StrainMirri
|
|
from mirri.entities.publication import Publication
|
|
|
|
|
|
def retrieve_strain_by_accession_number(client, accession_number):
|
|
query = {"Query": [{"Index": 0,
|
|
"FieldName": "Collection accession number",
|
|
"Operation": "TextExactMatch",
|
|
"Value": accession_number}],
|
|
"Expression": "Q0",
|
|
"DisplayStart": 0,
|
|
"DisplayLength": 10}
|
|
|
|
result = client.search(STRAIN_WS, query=query)
|
|
total = result["total"]
|
|
if total == 0:
|
|
return None
|
|
elif total == 1:
|
|
return result["records"][0]
|
|
else:
|
|
msg = f"More than one entries for {accession_number} in database"
|
|
raise ValueError(msg)
|
|
|
|
|
|
def get_or_create_publication(client: BiolomicsMirriClient, pub: Publication):
|
|
new_pub = client.retrieve_by_name(BIBLIOGRAPHY_WS, pub.title)
|
|
|
|
if new_pub is not None:
|
|
return {'record': new_pub, 'created': False}
|
|
new_pub = client.create(BIBLIOGRAPHY_WS, pub)
|
|
return {'record': new_pub, 'created': True}
|
|
|
|
|
|
def get_or_create_sequence(client: BiolomicsMirriClient, sequence: GenomicSequenceBiolomics):
|
|
seq = client.retrieve_by_name(SEQUENCE_WS, sequence.marker_id)
|
|
if seq is not None:
|
|
return {'record': seq, 'created': False}
|
|
|
|
new_seq = client.create(SEQUENCE_WS, sequence)
|
|
return {'record': new_seq, 'created': True}
|
|
|
|
|
|
def get_or_create_or_update_strain(client: BiolomicsMirriClient,
|
|
record: StrainMirri, update=False):
|
|
response = get_or_create_strain(client, record)
|
|
new_record = response['record']
|
|
created = response['created']
|
|
|
|
if created:
|
|
return {'record': new_record, 'created': True, 'updated': False}
|
|
|
|
if not update:
|
|
return {'record': new_record, 'created': False, 'updated': False}
|
|
|
|
if record.record_id is None:
|
|
record.record_id = new_record.record_id
|
|
if record.record_name is None:
|
|
record.record_name = new_record.record_name
|
|
if record.synonyms is None or record.synonyms == []:
|
|
record.synonyms = new_record.synonyms
|
|
|
|
# compare_strains
|
|
# we exclude pub id as it is an internal reference of pub and can be changed
|
|
diffs = deepdiff.DeepDiff(new_record.dict(), record.dict(),
|
|
ignore_order=True, exclude_paths=None,
|
|
exclude_regex_paths=[r"root\[\'publications\'\]\[\d+\]\[\'id\'\]",
|
|
r"root\[\'publications\'\]\[\d+\]\[\'RecordId\'\]",
|
|
r"root\[\'genetics\'\]\[\'Markers\'\]\[\d+\]\[\'RecordId\'\]",
|
|
r"root\[\'genetics\'\]\[\'Markers\'\]\[\d+\]\[\'RecordName\'\]"])
|
|
|
|
if diffs:
|
|
pprint(diffs, width=200)
|
|
# pprint('en el que yo mando')
|
|
# pprint(record.dict())
|
|
# pprint('lo que hay en db')
|
|
# pprint(new_record.dict())
|
|
|
|
records_are_different = True if diffs else False
|
|
if records_are_different:
|
|
updated_record = update_strain(client, record)
|
|
updated = True
|
|
else:
|
|
updated_record = record
|
|
updated = False
|
|
return {'record': updated_record, 'created': False, 'updated': updated}
|
|
|
|
|
|
def get_or_create_strain(client: BiolomicsMirriClient, strain: StrainMirri):
|
|
new_strain = retrieve_strain_by_accession_number(client, strain.id.strain_id)
|
|
if new_strain is not None:
|
|
return {'record': new_strain, 'created': False}
|
|
|
|
new_strain = create_strain(client, strain)
|
|
|
|
return {'record': new_strain, 'created': True}
|
|
|
|
|
|
def create_strain(client: BiolomicsMirriClient, strain: StrainMirri):
|
|
for pub in strain.publications:
|
|
creation_response = get_or_create_publication(client, pub)
|
|
for marker in strain.genetics.markers:
|
|
creation_response = get_or_create_sequence(client, marker)
|
|
|
|
new_strain = client.create(STRAIN_WS, strain)
|
|
return new_strain
|
|
|
|
|
|
def update_strain(client: BiolomicsMirriClient, strain: StrainMirri):
|
|
for pub in strain.publications:
|
|
creation_response = get_or_create_publication(client, pub)
|
|
for marker in strain.genetics.markers:
|
|
creation_response = get_or_create_sequence(client, marker)
|
|
|
|
new_strain = client.update(STRAIN_WS, strain)
|
|
return new_strain
|
|
|