First import
This commit is contained in:
commit
332876f58c
19
README.md
Normal file
19
README.md
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# MIRRI Utils
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
> pip install path_to_package.tar.gz
|
||||||
|
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
A small set of utilities to deal with Mirri Data.
|
||||||
|
|
||||||
|
- A data class to deal with strain data.
|
||||||
|
|
||||||
|
- An excel reader for mirri specification
|
||||||
|
|
||||||
|
- An excel validator for mirri specification
|
||||||
|
|
||||||
|
- An excel writer to create the excel with MIRRI specifications
|
||||||
|
|
||||||
77
bin/delete_duplicated_strain_by_number.py
Normal file
77
bin/delete_duplicated_strain_by_number.py
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||||
|
from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS, STRAIN_WS
|
||||||
|
|
||||||
|
SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
|
||||||
|
|
||||||
|
|
||||||
|
def get_cmd_args():
|
||||||
|
desc = "Upload strains to MIRRI-IS"
|
||||||
|
parser = argparse.ArgumentParser(description=desc)
|
||||||
|
parser.add_argument('-a', '--accession_number', required=True,
|
||||||
|
help='Delete the duplicated items in database for the given accession number')
|
||||||
|
parser.add_argument('-u', '--ws_user', help='Username of the web service',
|
||||||
|
required=True)
|
||||||
|
parser.add_argument('-p', '--ws_password', required=True,
|
||||||
|
help='Password of the web service user')
|
||||||
|
parser.add_argument('-c', '--client_id', required=True,
|
||||||
|
help='Client id of the web service')
|
||||||
|
parser.add_argument('-s', '--client_secret', required=True,
|
||||||
|
help='Client secret of the web service')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
return {'accession_number': args.accession_number, 'user': args.ws_user,
|
||||||
|
'password': args.ws_password, 'client_id': args.client_id,
|
||||||
|
'client_secret': args.client_secret}
|
||||||
|
|
||||||
|
|
||||||
|
def write_errors_in_screen(errors, fhand=sys.stderr):
|
||||||
|
for key, errors_by_type in errors.items():
|
||||||
|
fhand.write(f'{key}\n')
|
||||||
|
fhand.write('-' * len(key) + '\n')
|
||||||
|
for error in errors_by_type:
|
||||||
|
if error.pk:
|
||||||
|
fhand.write(f'{error.pk}: ')
|
||||||
|
fhand.write(f'{error.message} - {error.code}\n')
|
||||||
|
fhand.write('\n')
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = get_cmd_args()
|
||||||
|
out_fhand = sys.stdout
|
||||||
|
|
||||||
|
client = BiolomicsMirriClient(server_url=SERVER_URL, api_version= 'v2',
|
||||||
|
client_id=args['client_id'],
|
||||||
|
client_secret=args['client_secret'],
|
||||||
|
username=args['user'],
|
||||||
|
password=args['password'])
|
||||||
|
query = {"Query": [{"Index": 0,
|
||||||
|
"FieldName": "Collection accession number",
|
||||||
|
"Operation": "TextExactMatch",
|
||||||
|
"Value": args['accession_number']}],
|
||||||
|
"Expression": "Q0",
|
||||||
|
"DisplayStart": 0,
|
||||||
|
"DisplayLength": 10}
|
||||||
|
|
||||||
|
result = client.search(STRAIN_WS, query=query)
|
||||||
|
total = result["total"]
|
||||||
|
if total == 0:
|
||||||
|
out_fhand.write('Accession not in database\n')
|
||||||
|
sys.exit(0)
|
||||||
|
return None
|
||||||
|
elif total == 1:
|
||||||
|
out_fhand.write('Accession is not duplicated\n')
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
print(f'Duplicates found: {total}. removing duplicates')
|
||||||
|
duplicated_ids = [record.record_id for record in result['records']]
|
||||||
|
for duplicated_id in duplicated_ids[:-1]:
|
||||||
|
client.delete_by_id(STRAIN_WS, duplicated_id)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
91
bin/delete_mirri_data.py
Normal file
91
bin/delete_mirri_data.py
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from mirri.biolomics.pipelines.strain import retrieve_strain_by_accession_number
|
||||||
|
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||||
|
from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS, STRAIN_WS
|
||||||
|
from mirri.io.parsers.mirri_excel import parse_mirri_excel
|
||||||
|
from mirri.validation.excel_validator import validate_mirri_excel
|
||||||
|
|
||||||
|
SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
|
||||||
|
|
||||||
|
|
||||||
|
def get_cmd_args():
|
||||||
|
desc = "Upload strains to MIRRI-IS"
|
||||||
|
parser = argparse.ArgumentParser(description=desc)
|
||||||
|
parser.add_argument('-i', '--input', help='Validated Excel file',
|
||||||
|
type=argparse.FileType('rb'), required=True)
|
||||||
|
parser.add_argument('-v', '--spec_version', default='20200601',
|
||||||
|
help='Version of he specification of the given excel file')
|
||||||
|
parser.add_argument('-u', '--ws_user', help='Username of the web service',
|
||||||
|
required=True)
|
||||||
|
parser.add_argument('-p', '--ws_password', required=True,
|
||||||
|
help='Password of the web service user')
|
||||||
|
parser.add_argument('-c', '--client_id', required=True,
|
||||||
|
help='Client id of the web service')
|
||||||
|
parser.add_argument('-s', '--client_secret', required=True,
|
||||||
|
help='Client secret of the web service')
|
||||||
|
parser.add_argument('-f', '--force_update', required=False,
|
||||||
|
action='store_true',
|
||||||
|
help='Use it if you want to update the existing strains')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
return {'input_fhand': args.input, 'user': args.ws_user,
|
||||||
|
'version': args.spec_version,
|
||||||
|
'password': args.ws_password, 'client_id': args.client_id,
|
||||||
|
'client_secret': args.client_secret, 'update': args.force_update}
|
||||||
|
|
||||||
|
|
||||||
|
def write_errors_in_screen(errors, fhand=sys.stderr):
|
||||||
|
for key, errors_by_type in errors.items():
|
||||||
|
fhand.write(f'{key}\n')
|
||||||
|
fhand.write('-' * len(key) + '\n')
|
||||||
|
for error in errors_by_type:
|
||||||
|
if error.pk:
|
||||||
|
fhand.write(f'{error.pk}: ')
|
||||||
|
fhand.write(f'{error.message} - {error.code}\n')
|
||||||
|
fhand.write('\n')
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = get_cmd_args()
|
||||||
|
input_fhand = args['input_fhand']
|
||||||
|
spec_version = args['version']
|
||||||
|
out_fhand = sys.stderr
|
||||||
|
error_log = validate_mirri_excel(input_fhand, version=spec_version)
|
||||||
|
errors = error_log.get_errors()
|
||||||
|
if errors:
|
||||||
|
write_errors_in_screen(errors, out_fhand)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
input_fhand.seek(0)
|
||||||
|
parsed_objects = parse_mirri_excel(input_fhand, version=spec_version)
|
||||||
|
strains = list(parsed_objects['strains'])
|
||||||
|
growth_media = list(parsed_objects['growth_media'])
|
||||||
|
|
||||||
|
client = BiolomicsMirriClient(server_url=SERVER_URL, api_version= 'v2',
|
||||||
|
client_id=args['client_id'],
|
||||||
|
client_secret=args['client_secret'],
|
||||||
|
username=args['user'],
|
||||||
|
password=args['password'])
|
||||||
|
for gm in growth_media:
|
||||||
|
try:
|
||||||
|
client.delete_by_name(GROWTH_MEDIUM_WS, gm.acronym)
|
||||||
|
except ValueError as error:
|
||||||
|
print(error)
|
||||||
|
continue
|
||||||
|
print(f'Growth medium {gm.acronym} deleted')
|
||||||
|
|
||||||
|
for strain in strains:
|
||||||
|
ws_strain = retrieve_strain_by_accession_number(client, strain.id.strain_id)
|
||||||
|
if ws_strain is not None:
|
||||||
|
client.delete_by_id(STRAIN_WS, ws_strain.record_id)
|
||||||
|
print(f'Strain {strain.id.strain_id} deleted')
|
||||||
|
else:
|
||||||
|
print(f'Strain {strain.id.strain_id} not in database')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
182
bin/upload_strains_to_mirri_is.py
Normal file
182
bin/upload_strains_to_mirri_is.py
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
from mirri.biolomics.pipelines.growth_medium import get_or_create_or_update_growth_medium
|
||||||
|
from mirri.biolomics.pipelines.strain import get_or_create_or_update_strain
|
||||||
|
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||||
|
from mirri.io.parsers.mirri_excel import parse_mirri_excel
|
||||||
|
from mirri.validation.excel_validator import validate_mirri_excel
|
||||||
|
|
||||||
|
TEST_SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
|
||||||
|
PROD_SERVER_URL = 'https://webservices.bio-aware.com/mirri'
|
||||||
|
|
||||||
|
|
||||||
|
def get_cmd_args():
|
||||||
|
desc = "Upload strains to MIRRI-IS"
|
||||||
|
parser = argparse.ArgumentParser(description=desc)
|
||||||
|
parser.add_argument('-i', '--input', help='Validated Excel file',
|
||||||
|
type=argparse.FileType('rb'), required=True)
|
||||||
|
parser.add_argument('-v', '--spec_version', default='20200601',
|
||||||
|
help='Version of he specification of the given excel file')
|
||||||
|
parser.add_argument('-u', '--ws_user', help='Username of the web service',
|
||||||
|
required=True)
|
||||||
|
parser.add_argument('-p', '--ws_password', required=True,
|
||||||
|
help='Password of the web service user')
|
||||||
|
parser.add_argument('-c', '--client_id', required=True,
|
||||||
|
help='Client id of the web service')
|
||||||
|
parser.add_argument('-s', '--client_secret', required=True,
|
||||||
|
help='Client secret of the web service')
|
||||||
|
parser.add_argument('--force_update', required=False,
|
||||||
|
action='store_true',
|
||||||
|
help='Use it if you want to update the existing strains')
|
||||||
|
parser.add_argument('--verbose', action='store_true',
|
||||||
|
help='use it if you want a verbose output')
|
||||||
|
parser.add_argument('--prod', action='store_true',
|
||||||
|
help='Use production server')
|
||||||
|
parser.add_argument('--dont_add_gm', action='store_false',
|
||||||
|
help="Don't add growth media", default=True)
|
||||||
|
parser.add_argument('--dont_add_strains', action='store_false',
|
||||||
|
help="Don't add growth media", default=True)
|
||||||
|
parser.add_argument('--skip_first_num', type=int,
|
||||||
|
help='skip first X strains to the tool')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
return {'input_fhand': args.input, 'user': args.ws_user,
|
||||||
|
'version': args.spec_version,
|
||||||
|
'password': args.ws_password, 'client_id': args.client_id,
|
||||||
|
'client_secret': args.client_secret, 'update': args.force_update,
|
||||||
|
'verbose': args.verbose, 'use_production_server': args.prod,
|
||||||
|
'add_gm': args.dont_add_gm, 'add_strains': args.dont_add_strains,
|
||||||
|
'skip_first_num': args.skip_first_num}
|
||||||
|
|
||||||
|
|
||||||
|
def write_errors_in_screen(errors, fhand=sys.stderr):
|
||||||
|
for key, errors_by_type in errors.items():
|
||||||
|
fhand.write(f'{key}\n')
|
||||||
|
fhand.write('-' * len(key) + '\n')
|
||||||
|
for error in errors_by_type:
|
||||||
|
if error.pk:
|
||||||
|
fhand.write(f'{error.pk}: ')
|
||||||
|
fhand.write(f'{error.message} - {error.code}\n')
|
||||||
|
fhand.write('\n')
|
||||||
|
|
||||||
|
|
||||||
|
def create_or_upload_strains(client, strains, update=False, counter=None,
|
||||||
|
out_fhand=None, seek=None):
|
||||||
|
for index, strain in enumerate(strains):
|
||||||
|
if seek is not None and index < seek:
|
||||||
|
continue
|
||||||
|
# if strain.id.strain_id != 'CECT 5766':
|
||||||
|
# continue
|
||||||
|
result = get_or_create_or_update_strain(client, strain, update=update)
|
||||||
|
|
||||||
|
new_strain = result['record']
|
||||||
|
created = result['created']
|
||||||
|
updated = result.get('updated', False)
|
||||||
|
if updated:
|
||||||
|
result_state = 'updated'
|
||||||
|
elif created:
|
||||||
|
result_state = 'created'
|
||||||
|
else:
|
||||||
|
result_state = 'not modified'
|
||||||
|
if counter is not None:
|
||||||
|
counter[result_state] += 1
|
||||||
|
if out_fhand is not None:
|
||||||
|
out_fhand.write(f'{index}: Strain {new_strain.id.strain_id}: {result_state}\n')
|
||||||
|
# break
|
||||||
|
|
||||||
|
|
||||||
|
def create_or_upload_growth_media(client, growth_media, update=False, counter=None,
|
||||||
|
out_fhand=None):
|
||||||
|
|
||||||
|
for gm in growth_media:
|
||||||
|
result = get_or_create_or_update_growth_medium(client, gm, update)
|
||||||
|
|
||||||
|
new_gm = result['record']
|
||||||
|
created = result['created']
|
||||||
|
updated = result.get('updated', False)
|
||||||
|
if updated:
|
||||||
|
result_state = 'updated'
|
||||||
|
elif created:
|
||||||
|
result_state = 'created'
|
||||||
|
else:
|
||||||
|
result_state = 'not modified'
|
||||||
|
if counter is not None:
|
||||||
|
counter[result_state] += 1
|
||||||
|
if out_fhand is not None:
|
||||||
|
out_fhand.write(f'Growth medium {new_gm.record_name}: {result_state}\n')
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = get_cmd_args()
|
||||||
|
input_fhand = args['input_fhand']
|
||||||
|
spec_version = args['version']
|
||||||
|
out_fhand = sys.stdout
|
||||||
|
error_log = validate_mirri_excel(input_fhand, version=spec_version)
|
||||||
|
errors = error_log.get_errors()
|
||||||
|
skip_first_num = args['skip_first_num']
|
||||||
|
if errors:
|
||||||
|
write_errors_in_screen(errors, out_fhand)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
input_fhand.seek(0)
|
||||||
|
parsed_objects = parse_mirri_excel(input_fhand, version=spec_version)
|
||||||
|
strains = list(parsed_objects['strains'])
|
||||||
|
growth_media = list(parsed_objects['growth_media'])
|
||||||
|
|
||||||
|
server_url = PROD_SERVER_URL if args['use_production_server'] else TEST_SERVER_URL
|
||||||
|
|
||||||
|
client = BiolomicsMirriClient(server_url=server_url, api_version='v2',
|
||||||
|
client_id=args['client_id'],
|
||||||
|
client_secret=args['client_secret'],
|
||||||
|
username=args['user'],
|
||||||
|
password=args['password'],
|
||||||
|
verbose=args['verbose'])
|
||||||
|
|
||||||
|
if args['add_gm']:
|
||||||
|
client.start_transaction()
|
||||||
|
counter = Counter()
|
||||||
|
try:
|
||||||
|
create_or_upload_growth_media(client, growth_media, update=args['update'],
|
||||||
|
counter=counter, out_fhand=out_fhand)
|
||||||
|
except (Exception, KeyboardInterrupt) as error:
|
||||||
|
out_fhand.write('There were some errors in the Growth media upload\n')
|
||||||
|
out_fhand.write(str(error) + '\n')
|
||||||
|
out_fhand.write('Rolling back\n')
|
||||||
|
client.rollback()
|
||||||
|
raise
|
||||||
|
client.finish_transaction()
|
||||||
|
show_stats(counter, 'Growth Media', out_fhand)
|
||||||
|
|
||||||
|
if args['add_strains']:
|
||||||
|
client.start_transaction()
|
||||||
|
counter = Counter()
|
||||||
|
try:
|
||||||
|
create_or_upload_strains(client, strains, update=args['update'],
|
||||||
|
counter=counter,
|
||||||
|
out_fhand=out_fhand, seek=skip_first_num)
|
||||||
|
client.finish_transaction()
|
||||||
|
except (Exception, KeyboardInterrupt) as error:
|
||||||
|
out_fhand.write('There were some errors in the Strain upload\n')
|
||||||
|
out_fhand.write(str(error) + '\n')
|
||||||
|
out_fhand.write('rolling back\n')
|
||||||
|
# client.rollback()
|
||||||
|
raise
|
||||||
|
client.finish_transaction()
|
||||||
|
show_stats(counter, 'Strains', out_fhand)
|
||||||
|
|
||||||
|
|
||||||
|
def show_stats(counter, kind, out_fhand):
|
||||||
|
out_fhand.write(f'{kind}\n')
|
||||||
|
line = ''.join(['-'] * len(kind))
|
||||||
|
out_fhand.write(f"{line}\n")
|
||||||
|
for kind2, value in counter.most_common(5):
|
||||||
|
out_fhand.write(f'{kind2}: {value}\n')
|
||||||
|
out_fhand.write('\n')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
19
bin/validate.py
Normal file
19
bin/validate.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from mirri.validation.excel_validator import validate_mirri_excel
|
||||||
|
import warnings
|
||||||
|
warnings.simplefilter("ignore")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
path = Path(sys.argv[1])
|
||||||
|
error_log = validate_mirri_excel(path.open("rb"))
|
||||||
|
|
||||||
|
for errors in error_log.get_errors().values():
|
||||||
|
for error in errors:
|
||||||
|
print(error.pk, error.message, error.code)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
BIN
docs/Error_Log_Style_Sheet.docx
Normal file
BIN
docs/Error_Log_Style_Sheet.docx
Normal file
Binary file not shown.
BIN
docs/ICT-TaskForce_HowToCompileTheSheets_v20200601.pdf
Normal file
BIN
docs/ICT-TaskForce_HowToCompileTheSheets_v20200601.pdf
Normal file
Binary file not shown.
BIN
docs/ICT-TaskForce_RecommendationsToCollections_v20200601.pdf
Normal file
BIN
docs/ICT-TaskForce_RecommendationsToCollections_v20200601.pdf
Normal file
Binary file not shown.
61
mirri/TODO.txt
Normal file
61
mirri/TODO.txt
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
|
||||||
|
Ontobiotope term. just one field in dataset, two fields in biolomics
|
||||||
|
Altitude. Field and in Coordinates
|
||||||
|
|
||||||
|
Geographic origin: field and Entry in other table
|
||||||
|
|
||||||
|
Ploidy: How is this field formated?haploid/diploid or 1, 2, 3...
|
||||||
|
|
||||||
|
Best strategy:
|
||||||
|
|
||||||
|
My class has
|
||||||
|
- strain data
|
||||||
|
- geographic data
|
||||||
|
- literature
|
||||||
|
- sequences
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
No not a valid value for Strain from a registered collection, Allowed values: ?. no. yes
|
||||||
|
yes not a valid value for GMO, Allowed values: ?. No. Yes
|
||||||
|
|
||||||
|
Organism Type:
|
||||||
|
firstuppercase in deposit
|
||||||
|
lower case in retrieve
|
||||||
|
|
||||||
|
Taxon name is a list in retrieve
|
||||||
|
|
||||||
|
|
||||||
|
null values:
|
||||||
|
'Comment on taxonomy' = '' could be null
|
||||||
|
'Coordinates of geographic origin':{Longitude, lati... 'NaN' could be null
|
||||||
|
'Date of inclusion in the catalogue' = '' could be null
|
||||||
|
'Enzyme production'= '' could be null
|
||||||
|
'Ploidy':'?' could be null
|
||||||
|
|
||||||
|
Deposit date
|
||||||
|
|
||||||
|
--------------------------------------------
|
||||||
|
|
||||||
|
- Assign seq to strain in strain serializers
|
||||||
|
- Fields in ws that are not in our specification. What to do with them
|
||||||
|
- Type description - IGNORE
|
||||||
|
- Associated documents - IGNORE
|
||||||
|
- Data provided by - IGNORE
|
||||||
|
- Orders - IGNORE
|
||||||
|
- MTA text - IGNORE
|
||||||
|
- Catalog URL -
|
||||||
|
|
||||||
|
- Publication RecordName assignation. How to do it?
|
||||||
|
- Sequence RecordName assignation. How to do it
|
||||||
|
- Publications serializer improvement
|
||||||
|
------------------------------------------------------
|
||||||
|
|
||||||
|
Marker Name. Which options are allowed in WS and how they map to the types in specifications?
|
||||||
|
|
||||||
|
update: it should be done in the detail url.
|
||||||
|
|
||||||
|
interspecific_hybrid set to "no" by default in web service if no value given.
|
||||||
|
Tested temperature growth range {'max': 0.0, 'min': 0.0} when added empty
|
||||||
|
|
||||||
|
Very slow: A normal search action takes
|
||||||
21
mirri/__init__.py
Normal file
21
mirri/__init__.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
import functools
|
||||||
|
|
||||||
|
|
||||||
|
def rgetattr(obj, attr, *args):
|
||||||
|
|
||||||
|
def _getattr(obj, attr):
|
||||||
|
return getattr(obj, attr, *args)
|
||||||
|
|
||||||
|
return functools.reduce(_getattr, [obj] + attr.split('.'))
|
||||||
|
|
||||||
|
|
||||||
|
def rsetattr(obj, attr, val):
|
||||||
|
pre, _, post = attr.rpartition('.')
|
||||||
|
return setattr(rgetattr(obj, pre) if pre else obj, post, val)
|
||||||
|
|
||||||
|
# using wonder's beautiful simplification:
|
||||||
|
# https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects/31174427?noredirect=1#comment86638618_31174427
|
||||||
|
|
||||||
|
|
||||||
|
class ValidationError(Exception):
|
||||||
|
pass
|
||||||
0
mirri/biolomics/__init__.py
Normal file
0
mirri/biolomics/__init__.py
Normal file
0
mirri/biolomics/pipelines/__init__.py
Normal file
0
mirri/biolomics/pipelines/__init__.py
Normal file
44
mirri/biolomics/pipelines/growth_medium.py
Normal file
44
mirri/biolomics/pipelines/growth_medium.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||||
|
from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS
|
||||||
|
from mirri.entities.growth_medium import GrowthMedium
|
||||||
|
from mirri.biolomics.serializers.growth_media import get_growth_medium_record_name
|
||||||
|
|
||||||
|
|
||||||
|
def get_or_create_or_update_growth_medium(client: BiolomicsMirriClient,
|
||||||
|
growth_medium: GrowthMedium,
|
||||||
|
update=False):
|
||||||
|
response = get_or_create_growth_medium(client, growth_medium)
|
||||||
|
|
||||||
|
new_gm = response['record']
|
||||||
|
created = response['created']
|
||||||
|
if created:
|
||||||
|
return {'record': new_gm, 'created': created, 'updated': False}
|
||||||
|
|
||||||
|
if not update:
|
||||||
|
return {'record': new_gm, 'created': False, 'updated': False}
|
||||||
|
|
||||||
|
# compare_strains
|
||||||
|
if growth_medium.is_equal(new_gm, exclude_fields=['record_id', 'record_name', 'acronym']):
|
||||||
|
records_are_different = False
|
||||||
|
else:
|
||||||
|
growth_medium.update(new_gm, include_fields=['record_id', 'record_name'])
|
||||||
|
records_are_different = True
|
||||||
|
|
||||||
|
if records_are_different:
|
||||||
|
updated_gm = client.update(GROWTH_MEDIUM_WS, growth_medium)
|
||||||
|
updated = True
|
||||||
|
else:
|
||||||
|
updated_gm = new_gm
|
||||||
|
updated = False
|
||||||
|
return {'record': updated_gm, 'created': False, 'updated': updated}
|
||||||
|
|
||||||
|
|
||||||
|
def get_or_create_growth_medium(client: BiolomicsMirriClient,
|
||||||
|
growth_medium: GrowthMedium):
|
||||||
|
record_name = get_growth_medium_record_name(growth_medium)
|
||||||
|
gm = client.retrieve_by_name(GROWTH_MEDIUM_WS, record_name)
|
||||||
|
if gm is not None:
|
||||||
|
return {'record': gm, 'created': False}
|
||||||
|
|
||||||
|
new_gm = client.create(GROWTH_MEDIUM_WS, growth_medium)
|
||||||
|
return {'record': new_gm, 'created': True}
|
||||||
122
mirri/biolomics/pipelines/strain.py
Normal file
122
mirri/biolomics/pipelines/strain.py
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
from pprint import pprint
|
||||||
|
import deepdiff
|
||||||
|
|
||||||
|
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient, BIBLIOGRAPHY_WS, SEQUENCE_WS, STRAIN_WS
|
||||||
|
|
||||||
|
from mirri.biolomics.serializers.sequence import GenomicSequenceBiolomics
|
||||||
|
from mirri.biolomics.serializers.strain import StrainMirri
|
||||||
|
from mirri.entities.publication import Publication
|
||||||
|
|
||||||
|
|
||||||
|
def retrieve_strain_by_accession_number(client, accession_number):
|
||||||
|
query = {"Query": [{"Index": 0,
|
||||||
|
"FieldName": "Collection accession number",
|
||||||
|
"Operation": "TextExactMatch",
|
||||||
|
"Value": accession_number}],
|
||||||
|
"Expression": "Q0",
|
||||||
|
"DisplayStart": 0,
|
||||||
|
"DisplayLength": 10}
|
||||||
|
|
||||||
|
result = client.search(STRAIN_WS, query=query)
|
||||||
|
total = result["total"]
|
||||||
|
if total == 0:
|
||||||
|
return None
|
||||||
|
elif total == 1:
|
||||||
|
return result["records"][0]
|
||||||
|
else:
|
||||||
|
msg = f"More than one entries for {accession_number} in database"
|
||||||
|
raise ValueError(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def get_or_create_publication(client: BiolomicsMirriClient, pub: Publication):
|
||||||
|
new_pub = client.retrieve_by_name(BIBLIOGRAPHY_WS, pub.title)
|
||||||
|
|
||||||
|
if new_pub is not None:
|
||||||
|
return {'record': new_pub, 'created': False}
|
||||||
|
new_pub = client.create(BIBLIOGRAPHY_WS, pub)
|
||||||
|
return {'record': new_pub, 'created': True}
|
||||||
|
|
||||||
|
|
||||||
|
def get_or_create_sequence(client: BiolomicsMirriClient, sequence: GenomicSequenceBiolomics):
|
||||||
|
seq = client.retrieve_by_name(SEQUENCE_WS, sequence.marker_id)
|
||||||
|
if seq is not None:
|
||||||
|
return {'record': seq, 'created': False}
|
||||||
|
|
||||||
|
new_seq = client.create(SEQUENCE_WS, sequence)
|
||||||
|
return {'record': new_seq, 'created': True}
|
||||||
|
|
||||||
|
|
||||||
|
def get_or_create_or_update_strain(client: BiolomicsMirriClient,
|
||||||
|
record: StrainMirri, update=False):
|
||||||
|
response = get_or_create_strain(client, record)
|
||||||
|
new_record = response['record']
|
||||||
|
created = response['created']
|
||||||
|
|
||||||
|
if created:
|
||||||
|
return {'record': new_record, 'created': True, 'updated': False}
|
||||||
|
|
||||||
|
if not update:
|
||||||
|
return {'record': new_record, 'created': False, 'updated': False}
|
||||||
|
|
||||||
|
if record.record_id is None:
|
||||||
|
record.record_id = new_record.record_id
|
||||||
|
if record.record_name is None:
|
||||||
|
record.record_name = new_record.record_name
|
||||||
|
if record.synonyms is None or record.synonyms == []:
|
||||||
|
record.synonyms = new_record.synonyms
|
||||||
|
|
||||||
|
# compare_strains
|
||||||
|
# we exclude pub id as it is an internal reference of pub and can be changed
|
||||||
|
diffs = deepdiff.DeepDiff(new_record.dict(), record.dict(),
|
||||||
|
ignore_order=True, exclude_paths=None,
|
||||||
|
exclude_regex_paths=[r"root\[\'publications\'\]\[\d+\]\[\'id\'\]",
|
||||||
|
r"root\[\'publications\'\]\[\d+\]\[\'RecordId\'\]",
|
||||||
|
r"root\[\'genetics\'\]\[\'Markers\'\]\[\d+\]\[\'RecordId\'\]",
|
||||||
|
r"root\[\'genetics\'\]\[\'Markers\'\]\[\d+\]\[\'RecordName\'\]"])
|
||||||
|
|
||||||
|
if diffs:
|
||||||
|
pprint(diffs, width=200)
|
||||||
|
# pprint('en el que yo mando')
|
||||||
|
# pprint(record.dict())
|
||||||
|
# pprint('lo que hay en db')
|
||||||
|
# pprint(new_record.dict())
|
||||||
|
|
||||||
|
records_are_different = True if diffs else False
|
||||||
|
if records_are_different:
|
||||||
|
updated_record = update_strain(client, record)
|
||||||
|
updated = True
|
||||||
|
else:
|
||||||
|
updated_record = record
|
||||||
|
updated = False
|
||||||
|
return {'record': updated_record, 'created': False, 'updated': updated}
|
||||||
|
|
||||||
|
|
||||||
|
def get_or_create_strain(client: BiolomicsMirriClient, strain: StrainMirri):
|
||||||
|
new_strain = retrieve_strain_by_accession_number(client, strain.id.strain_id)
|
||||||
|
if new_strain is not None:
|
||||||
|
return {'record': new_strain, 'created': False}
|
||||||
|
|
||||||
|
new_strain = create_strain(client, strain)
|
||||||
|
|
||||||
|
return {'record': new_strain, 'created': True}
|
||||||
|
|
||||||
|
|
||||||
|
def create_strain(client: BiolomicsMirriClient, strain: StrainMirri):
|
||||||
|
for pub in strain.publications:
|
||||||
|
creation_response = get_or_create_publication(client, pub)
|
||||||
|
for marker in strain.genetics.markers:
|
||||||
|
creation_response = get_or_create_sequence(client, marker)
|
||||||
|
|
||||||
|
new_strain = client.create(STRAIN_WS, strain)
|
||||||
|
return new_strain
|
||||||
|
|
||||||
|
|
||||||
|
def update_strain(client: BiolomicsMirriClient, strain: StrainMirri):
|
||||||
|
for pub in strain.publications:
|
||||||
|
creation_response = get_or_create_publication(client, pub)
|
||||||
|
for marker in strain.genetics.markers:
|
||||||
|
creation_response = get_or_create_sequence(client, marker)
|
||||||
|
|
||||||
|
new_strain = client.update(STRAIN_WS, strain)
|
||||||
|
return new_strain
|
||||||
|
|
||||||
0
mirri/biolomics/remote/__init__.py
Normal file
0
mirri/biolomics/remote/__init__.py
Normal file
210
mirri/biolomics/remote/biolomics_client.py
Normal file
210
mirri/biolomics/remote/biolomics_client.py
Normal file
@ -0,0 +1,210 @@
|
|||||||
|
from mirri.biolomics.remote.endoint_names import (SEQUENCE_WS, STRAIN_WS,
|
||||||
|
GROWTH_MEDIUM_WS, TAXONOMY_WS,
|
||||||
|
COUNTRY_WS, ONTOBIOTOPE_WS,
|
||||||
|
BIBLIOGRAPHY_WS)
|
||||||
|
from mirri.biolomics.remote.rest_client import BiolomicsClient
|
||||||
|
from mirri.biolomics.serializers.sequence import (
|
||||||
|
serialize_to_biolomics as sequence_to_biolomics,
|
||||||
|
serialize_from_biolomics as sequence_from_biolomics)
|
||||||
|
from mirri.biolomics.serializers.strain import (
|
||||||
|
serialize_to_biolomics as strain_to_biolomics,
|
||||||
|
serialize_from_biolomics as strain_from_biolomics)
|
||||||
|
|
||||||
|
from mirri.biolomics.serializers.growth_media import (
|
||||||
|
serialize_to_biolomics as growth_medium_to_biolomics,
|
||||||
|
serialize_from_biolomics as growth_medium_from_biolomics)
|
||||||
|
from mirri.biolomics.serializers.taxonomy import (
|
||||||
|
serialize_from_biolomics as taxonomy_from_biolomics)
|
||||||
|
from mirri.biolomics.serializers.locality import (
|
||||||
|
serialize_from_biolomics as country_from_biolomics)
|
||||||
|
from mirri.biolomics.serializers.ontobiotope import (
|
||||||
|
serialize_from_biolomics as ontobiotope_from_biolomics)
|
||||||
|
from mirri.biolomics.serializers.bibliography import (
|
||||||
|
serializer_from_biolomics as bibliography_from_biolomics,
|
||||||
|
serializer_to_biolomics as bibliography_to_biolomics
|
||||||
|
)
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
|
||||||
|
class BiolomicsMirriClient:
|
||||||
|
_conf = {
|
||||||
|
SEQUENCE_WS: {
|
||||||
|
'serializers': {'to': sequence_to_biolomics,
|
||||||
|
'from': sequence_from_biolomics},
|
||||||
|
'endpoint': 'WS Sequences'},
|
||||||
|
STRAIN_WS: {
|
||||||
|
'serializers': {'to': strain_to_biolomics,
|
||||||
|
'from': strain_from_biolomics},
|
||||||
|
'endpoint': 'WS Strains'},
|
||||||
|
GROWTH_MEDIUM_WS: {
|
||||||
|
'serializers': {'from': growth_medium_from_biolomics,
|
||||||
|
'to': growth_medium_to_biolomics},
|
||||||
|
'endpoint': 'WS Growth media'},
|
||||||
|
TAXONOMY_WS: {
|
||||||
|
'serializers': {'from': taxonomy_from_biolomics},
|
||||||
|
'endpoint': 'WS Taxonomy'},
|
||||||
|
COUNTRY_WS: {
|
||||||
|
'serializers': {'from': country_from_biolomics},
|
||||||
|
'endpoint': 'WS Locality'},
|
||||||
|
ONTOBIOTOPE_WS: {
|
||||||
|
'serializers': {'from': ontobiotope_from_biolomics},
|
||||||
|
'endpoint': 'WS Ontobiotope'},
|
||||||
|
BIBLIOGRAPHY_WS: {
|
||||||
|
'serializers': {'from': bibliography_from_biolomics,
|
||||||
|
'to': bibliography_to_biolomics},
|
||||||
|
'endpoint': 'WS Bibliography'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, server_url, api_version, client_id, client_secret, username,
|
||||||
|
password, website_id=1, verbose=False):
|
||||||
|
_client = BiolomicsClient(server_url, api_version, client_id,
|
||||||
|
client_secret, username, password,
|
||||||
|
website_id=website_id, verbose=verbose)
|
||||||
|
|
||||||
|
self.client = _client
|
||||||
|
self.schemas = self.client.get_schemas()
|
||||||
|
self.allowed_fields = self.client.allowed_fields
|
||||||
|
self._transaction_created_ids = None
|
||||||
|
self._in_transaction = False
|
||||||
|
self._verbose = verbose
|
||||||
|
|
||||||
|
def _initialize_transaction_storage(self):
|
||||||
|
if self._in_transaction:
|
||||||
|
msg = 'Can not initialize transaction if already in a transaction'
|
||||||
|
raise RuntimeError(msg)
|
||||||
|
self._transaction_created_ids = []
|
||||||
|
|
||||||
|
def _add_created_to_transaction_storage(self, response, entity_name):
|
||||||
|
if not self._in_transaction:
|
||||||
|
msg = 'Can not add ids to transaction storage if not in a transaction'
|
||||||
|
raise RuntimeError(msg)
|
||||||
|
|
||||||
|
id_ = response.json().get('RecordId', None)
|
||||||
|
if id_ is not None:
|
||||||
|
ws_endpoint_name = self._conf[entity_name]['endpoint']
|
||||||
|
self._transaction_created_ids.insert(0, (ws_endpoint_name, id_))
|
||||||
|
|
||||||
|
def start_transaction(self):
|
||||||
|
self._initialize_transaction_storage()
|
||||||
|
self._in_transaction = True
|
||||||
|
|
||||||
|
def finish_transaction(self):
|
||||||
|
self._in_transaction = False
|
||||||
|
self._transaction_created_ids = None
|
||||||
|
|
||||||
|
def get_endpoint(self, entity_name):
|
||||||
|
return self._conf[entity_name]['endpoint']
|
||||||
|
|
||||||
|
def get_serializers_to(self, entity_name):
|
||||||
|
return self._conf[entity_name]['serializers']['to']
|
||||||
|
|
||||||
|
def get_serializers_from(self, entity_name):
|
||||||
|
return self._conf[entity_name]['serializers']['from']
|
||||||
|
|
||||||
|
def retrieve_by_name(self, entity_name, name):
|
||||||
|
endpoint = self.get_endpoint(entity_name)
|
||||||
|
serializer_from = self.get_serializers_from(entity_name)
|
||||||
|
response = self.client.find_by_name(endpoint, name=name)
|
||||||
|
if response.status_code == 404:
|
||||||
|
return None
|
||||||
|
elif response.status_code != 200:
|
||||||
|
raise ValueError(f"{response.status_code}: {response.text}")
|
||||||
|
|
||||||
|
ws_entity = response.json()
|
||||||
|
|
||||||
|
return None if ws_entity is None else serializer_from(ws_entity,
|
||||||
|
client=self)
|
||||||
|
|
||||||
|
def retrieve_by_id(self, entity_name, _id):
|
||||||
|
endpoint = self.get_endpoint(entity_name)
|
||||||
|
serializer_from = self.get_serializers_from(entity_name)
|
||||||
|
response = self.client.retrieve(endpoint, record_id=_id)
|
||||||
|
if response.status_code == 404:
|
||||||
|
return None
|
||||||
|
elif response.status_code != 200:
|
||||||
|
raise ValueError(f"{response.status_code}: {response.text}")
|
||||||
|
|
||||||
|
ws_entity = response.json()
|
||||||
|
|
||||||
|
return serializer_from(ws_entity, client=self)
|
||||||
|
|
||||||
|
def create(self, entity_name, entity):
|
||||||
|
endpoint = self.get_endpoint(entity_name)
|
||||||
|
serializer_to = self.get_serializers_to(entity_name)
|
||||||
|
serializer_from = self.get_serializers_from(entity_name)
|
||||||
|
data = serializer_to(entity, client=self)
|
||||||
|
response = self.client.create(endpoint, data=data)
|
||||||
|
if response.status_code == 200:
|
||||||
|
if self._in_transaction:
|
||||||
|
self._add_created_to_transaction_storage(response, entity_name)
|
||||||
|
return serializer_from(response.json(), client=self)
|
||||||
|
else:
|
||||||
|
msg = f"return_code: {response.status_code}. msg: {response.json()['errors']['Value']}"
|
||||||
|
raise RuntimeError(msg)
|
||||||
|
|
||||||
|
def delete_by_id(self, entity_name, record_id):
|
||||||
|
endpoint = self.get_endpoint(entity_name)
|
||||||
|
response = self.client.delete(endpoint, record_id=record_id)
|
||||||
|
if response.status_code != 200:
|
||||||
|
error = response.json()
|
||||||
|
# msg = f'{error["Title"]: {error["Details"]}}'
|
||||||
|
raise RuntimeError(error)
|
||||||
|
|
||||||
|
def delete_by_name(self, entity_name, record_name):
|
||||||
|
endpoint = self.get_endpoint(entity_name)
|
||||||
|
response = self.client.find_by_name(endpoint, record_name)
|
||||||
|
if response.status_code != 200:
|
||||||
|
error = response.json()
|
||||||
|
# msg = f'{error["Title"]: {error["Details"]}}'
|
||||||
|
raise RuntimeError(error)
|
||||||
|
try:
|
||||||
|
record_id = response.json()['RecordId']
|
||||||
|
except TypeError:
|
||||||
|
raise ValueError(f'The given record_name {record_name} does not exists')
|
||||||
|
self.delete_by_id(entity_name, record_id=record_id)
|
||||||
|
|
||||||
|
def search(self, entity_name, query):
|
||||||
|
endpoint = self.get_endpoint(entity_name)
|
||||||
|
serializer_from = self.get_serializers_from(entity_name)
|
||||||
|
response = self.client.search(endpoint, search_query=query)
|
||||||
|
if response.status_code != 200:
|
||||||
|
error = response.json()
|
||||||
|
# msg = f'{error["Title"]: {error["Details"]}}'
|
||||||
|
raise RuntimeError(error)
|
||||||
|
search_result = response.json()
|
||||||
|
# pprint(search_result)
|
||||||
|
result = {'total': search_result['TotalCount'],
|
||||||
|
'records': [serializer_from(record, client=self)
|
||||||
|
for record in search_result['Records']]}
|
||||||
|
return result
|
||||||
|
|
||||||
|
def update(self, entity_name, entity):
|
||||||
|
record_id = entity.record_id
|
||||||
|
if record_id is None:
|
||||||
|
msg = 'In order to update the record, you need the recordId in the entity'
|
||||||
|
raise ValueError(msg)
|
||||||
|
endpoint = self.get_endpoint(entity_name)
|
||||||
|
serializer_to = self.get_serializers_to(entity_name)
|
||||||
|
serializer_from = self.get_serializers_from(entity_name)
|
||||||
|
data = serializer_to(entity, client=self, update=True)
|
||||||
|
# print('update')
|
||||||
|
# pprint(entity.dict())
|
||||||
|
# print(data)
|
||||||
|
# pprint(data, width=200)
|
||||||
|
response = self.client.update(endpoint, record_id=record_id, data=data)
|
||||||
|
if response.status_code == 200:
|
||||||
|
# print('receive')
|
||||||
|
# pprint(response.json())
|
||||||
|
entity = serializer_from(response.json(), client=self)
|
||||||
|
# pprint(entity.dict())
|
||||||
|
return entity
|
||||||
|
|
||||||
|
else:
|
||||||
|
msg = f"return_code: {response.status_code}. msg: {response.text}"
|
||||||
|
raise RuntimeError(msg)
|
||||||
|
|
||||||
|
def rollback(self):
|
||||||
|
self._in_transaction = False
|
||||||
|
self.client.rollback(self._transaction_created_ids)
|
||||||
|
self._transaction_created_ids = None
|
||||||
7
mirri/biolomics/remote/endoint_names.py
Normal file
7
mirri/biolomics/remote/endoint_names.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
SEQUENCE_WS = 'sequence'
|
||||||
|
STRAIN_WS = 'strain'
|
||||||
|
GROWTH_MEDIUM_WS = 'growth_medium'
|
||||||
|
TAXONOMY_WS = 'taxonomy'
|
||||||
|
COUNTRY_WS = 'country'
|
||||||
|
ONTOBIOTOPE_WS = 'ontobiotope'
|
||||||
|
BIBLIOGRAPHY_WS = 'bibliography'
|
||||||
214
mirri/biolomics/remote/rest_client.py
Normal file
214
mirri/biolomics/remote/rest_client.py
Normal file
@ -0,0 +1,214 @@
|
|||||||
|
import time
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from requests_oauthlib import OAuth2Session
|
||||||
|
from oauthlib.oauth2 import LegacyApplicationClient
|
||||||
|
from oauthlib.oauth2.rfc6749.errors import InvalidGrantError
|
||||||
|
|
||||||
|
from mirri.entities.strain import ValidationError
|
||||||
|
|
||||||
|
|
||||||
|
class BiolomicsClient:
|
||||||
|
schemas = None
|
||||||
|
allowed_fields = None
|
||||||
|
|
||||||
|
def __init__(self, server_url, api_version, client_id, client_secret,
|
||||||
|
username, password, website_id=1, verbose=False):
|
||||||
|
self._client_id = client_id
|
||||||
|
self._client_secret = client_secret
|
||||||
|
self._username = username
|
||||||
|
self._password = password
|
||||||
|
self._client = None
|
||||||
|
self.server_url = server_url
|
||||||
|
self._api_version = api_version
|
||||||
|
self._auth_url = self.server_url + "/connect/token"
|
||||||
|
self.access_token = None
|
||||||
|
self.website_id = website_id
|
||||||
|
self._verbose = verbose
|
||||||
|
self._schema = self.get_schemas()
|
||||||
|
|
||||||
|
def get_access_token(self):
|
||||||
|
if self._client is None:
|
||||||
|
self._client = LegacyApplicationClient(client_id=self._client_id)
|
||||||
|
authenticated = False
|
||||||
|
else:
|
||||||
|
expires_at = self._client.token["expires_at"]
|
||||||
|
authenticated = expires_at > time.time()
|
||||||
|
if not authenticated:
|
||||||
|
oauth = OAuth2Session(client=self._client)
|
||||||
|
try:
|
||||||
|
token = oauth.fetch_token(
|
||||||
|
token_url=self._auth_url,
|
||||||
|
username=self._username,
|
||||||
|
password=self._password,
|
||||||
|
client_id=self._client_id,
|
||||||
|
client_secret=self._client_secret,
|
||||||
|
)
|
||||||
|
except InvalidGrantError:
|
||||||
|
oauth.close()
|
||||||
|
raise
|
||||||
|
self.access_token = token["access_token"]
|
||||||
|
oauth.close()
|
||||||
|
return self.access_token
|
||||||
|
|
||||||
|
def _build_headers(self):
|
||||||
|
self.get_access_token()
|
||||||
|
return {
|
||||||
|
"accept": "application/json",
|
||||||
|
"websiteId": str(self.website_id),
|
||||||
|
"Authorization": f"Bearer {self.access_token}",
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_detail_url(self, end_point, record_id, api_version=None):
|
||||||
|
# api_version = self._api_version if api_version is None else api_version
|
||||||
|
if api_version:
|
||||||
|
return "/".join([self.server_url, api_version, 'data',
|
||||||
|
end_point, str(record_id)])
|
||||||
|
else:
|
||||||
|
return "/".join([self.server_url, 'data', end_point, str(record_id)])
|
||||||
|
|
||||||
|
def get_list_url(self, end_point):
|
||||||
|
return "/".join([self.server_url, 'data', end_point])
|
||||||
|
# return "/".join([self.server_url, self._api_version, 'data', end_point])
|
||||||
|
|
||||||
|
def get_search_url(self, end_point):
|
||||||
|
return "/".join([self.server_url, self._api_version, 'search', end_point])
|
||||||
|
|
||||||
|
def get_find_by_name_url(self, end_point):
|
||||||
|
return "/".join([self.get_search_url(end_point), 'findByName'])
|
||||||
|
|
||||||
|
def search(self, end_point, search_query):
|
||||||
|
self._check_end_point_exists(end_point)
|
||||||
|
header = self._build_headers()
|
||||||
|
url = self.get_search_url(end_point)
|
||||||
|
time0 = time.time()
|
||||||
|
response = requests.post(url, json=search_query, headers=header)
|
||||||
|
time1 = time.time()
|
||||||
|
if self._verbose:
|
||||||
|
sys.stdout.write(f'Search to {end_point} request time for {url}: {time1 - time0}\n')
|
||||||
|
return response
|
||||||
|
|
||||||
|
def retrieve(self, end_point, record_id):
|
||||||
|
self._check_end_point_exists(end_point)
|
||||||
|
header = self._build_headers()
|
||||||
|
url = self.get_detail_url(end_point, record_id, api_version=self._api_version)
|
||||||
|
time0 = time.time()
|
||||||
|
response = requests.get(url, headers=header)
|
||||||
|
time1 = time.time()
|
||||||
|
if self._verbose:
|
||||||
|
sys.stdout.write(f'Get to {end_point} request time for {url}: {time1-time0}\n')
|
||||||
|
return response
|
||||||
|
|
||||||
|
def create(self, end_point, data):
|
||||||
|
self._check_end_point_exists(end_point)
|
||||||
|
self._check_data_consistency(data, self.allowed_fields[end_point])
|
||||||
|
header = self._build_headers()
|
||||||
|
url = self.get_list_url(end_point)
|
||||||
|
return requests.post(url, json=data, headers=header)
|
||||||
|
|
||||||
|
def update(self, end_point, record_id, data):
|
||||||
|
self._check_end_point_exists(end_point)
|
||||||
|
self._check_data_consistency(data, self.allowed_fields[end_point],
|
||||||
|
update=True)
|
||||||
|
header = self._build_headers()
|
||||||
|
url = self.get_detail_url(end_point, record_id=record_id)
|
||||||
|
return requests.put(url, json=data, headers=header)
|
||||||
|
|
||||||
|
def delete(self, end_point, record_id):
|
||||||
|
self._check_end_point_exists(end_point)
|
||||||
|
header = self._build_headers()
|
||||||
|
url = self.get_detail_url(end_point, record_id)
|
||||||
|
return requests.delete(url, headers=header)
|
||||||
|
|
||||||
|
def find_by_name(self, end_point, name):
|
||||||
|
self._check_end_point_exists(end_point)
|
||||||
|
header = self._build_headers()
|
||||||
|
url = self.get_find_by_name_url(end_point)
|
||||||
|
response = requests.get(url, headers=header, params={'name': name})
|
||||||
|
return response
|
||||||
|
|
||||||
|
def get_schemas(self):
|
||||||
|
if self.schemas is None:
|
||||||
|
headers = self._build_headers()
|
||||||
|
url = self.server_url + '/schemas'
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
if response.status_code == 200:
|
||||||
|
self.schemas = response.json()
|
||||||
|
else:
|
||||||
|
raise ValueError(f"{response.status_code}: {response.text}")
|
||||||
|
if self.allowed_fields is None:
|
||||||
|
self.allowed_fields = self._process_schema(self.schemas)
|
||||||
|
return self.schemas
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _process_schema(schemas):
|
||||||
|
schema = schemas[0]
|
||||||
|
allowed_fields = {}
|
||||||
|
for endpoint_schema in schema['TableViews']:
|
||||||
|
endpoint_name = endpoint_schema['TableViewName']
|
||||||
|
endpoint_values = endpoint_schema['ResultFields']
|
||||||
|
fields = {field['title']: field for field in endpoint_values}
|
||||||
|
allowed_fields[endpoint_name] = fields
|
||||||
|
return allowed_fields
|
||||||
|
|
||||||
|
def _check_end_point_exists(self, endpoint):
|
||||||
|
if endpoint not in self.allowed_fields.keys():
|
||||||
|
raise ValueError(f'{endpoint} not a recognised endpoint')
|
||||||
|
|
||||||
|
def _check_data_consistency(self, data, allowed_fields, update=False):
|
||||||
|
update_mandatory = set(['RecordDetails', 'RecordName', 'RecordId'])
|
||||||
|
if update and not update_mandatory.issubset(data.keys()):
|
||||||
|
msg = 'Updating data keys must be RecordDetails, RecordName and RecordId'
|
||||||
|
raise ValidationError(msg)
|
||||||
|
|
||||||
|
if not update and set(data.keys()).difference(['RecordDetails', 'RecordName', 'Acronym']):
|
||||||
|
msg = 'data keys must be RecordDetails and RecordName or Acronym'
|
||||||
|
raise ValidationError(msg)
|
||||||
|
for field_name, field_value in data['RecordDetails'].items():
|
||||||
|
if field_name not in allowed_fields:
|
||||||
|
raise ValidationError(f'{field_name} not in allowed fields')
|
||||||
|
|
||||||
|
field_schema = allowed_fields[field_name]
|
||||||
|
self._check_field_schema(field_name, field_schema, field_value)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _check_field_schema(field_name, field_schema, field_value):
|
||||||
|
if field_schema['FieldType'] != field_value['FieldType']:
|
||||||
|
msg = f"Bad FieldType ({field_value['FieldType']}) for {field_name}. "
|
||||||
|
msg += f"It should be {field_schema['FieldType']}"
|
||||||
|
raise ValidationError(msg)
|
||||||
|
|
||||||
|
states = field_schema['states'] if 'states' in field_schema else None
|
||||||
|
if states:
|
||||||
|
states = [re.sub(r" *\(.*\)", "", s) for s in states]
|
||||||
|
|
||||||
|
subfields = field_schema['subfields'] if 'subfields' in field_schema else None
|
||||||
|
if subfields is not None and states is not None:
|
||||||
|
subfield_names = [subfield['SubFieldName']
|
||||||
|
for subfield in subfields if subfield['IsUsed']]
|
||||||
|
|
||||||
|
for val in field_value['Value']:
|
||||||
|
if val['Name'] not in subfield_names:
|
||||||
|
msg = f"{field_name}: {val['Name']} not in {subfield_names}"
|
||||||
|
raise ValidationError(msg)
|
||||||
|
|
||||||
|
if val['Value'] not in states:
|
||||||
|
|
||||||
|
msg = f"{field_value['Value']} not a valid value for "
|
||||||
|
msg += f"{field_name}, Allowed values: {'. '.join(states)}"
|
||||||
|
raise ValidationError(msg)
|
||||||
|
|
||||||
|
elif states is not None:
|
||||||
|
if field_value['Value'] not in states:
|
||||||
|
msg = f"{field_value['Value']} not a valid value for "
|
||||||
|
msg += f"{field_name}, Allowed values: {'. '.join(states)}"
|
||||||
|
raise ValidationError(msg)
|
||||||
|
|
||||||
|
def rollback(self, created_ids):
|
||||||
|
for endpoint, id_ in created_ids:
|
||||||
|
try:
|
||||||
|
self.delete(end_point=endpoint, record_id=id_)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
3
mirri/biolomics/serializers/__init__.py
Normal file
3
mirri/biolomics/serializers/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
RECORD_ID = 'RecordId'
|
||||||
|
RECORD_NAME = 'RecordName'
|
||||||
|
RECORD_DETAILS = 'RecordDetails'
|
||||||
82
mirri/biolomics/serializers/bibliography.py
Normal file
82
mirri/biolomics/serializers/bibliography.py
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
from typing import List
|
||||||
|
|
||||||
|
from mirri import rgetattr
|
||||||
|
from mirri.entities.publication import Publication
|
||||||
|
from mirri.biolomics.settings import PUB_MIRRI_FIELDS
|
||||||
|
|
||||||
|
RECORD_ID = 'RecordId'
|
||||||
|
RECORD_NAME = 'RecordName'
|
||||||
|
|
||||||
|
PUB_MAPPING = {
|
||||||
|
# 'record_id': 'RecordId',
|
||||||
|
# 'record_name': 'RecordName',
|
||||||
|
'strains': "Associated strains",
|
||||||
|
'taxa': "Associated taxa",
|
||||||
|
'authors': "Authors",
|
||||||
|
# 'sequneces': "Associated sequences",
|
||||||
|
# 'abstract': "Abstract",
|
||||||
|
# 'collection': "Collection",
|
||||||
|
'doi': "DOI number",
|
||||||
|
'editor': "Editor(s)",
|
||||||
|
# 'full_reference': "Full reference",
|
||||||
|
# 'link': "Hyperlink",
|
||||||
|
'isbn': "ISBN",
|
||||||
|
'issn': "ISSN",
|
||||||
|
'issue': "Issue",
|
||||||
|
'journal': "Journal",
|
||||||
|
'journal_book': "Journal-Book",
|
||||||
|
# 'keywords': "Keywords",
|
||||||
|
'first_page': "Page from",
|
||||||
|
'last_page': "Page to",
|
||||||
|
'publisher': "Publisher",
|
||||||
|
'pubmed_id': "PubMed ID",
|
||||||
|
'volume': "Volume",
|
||||||
|
'year': "Year",
|
||||||
|
}
|
||||||
|
REV_PUB_MAPPING = {v: k for k, v in PUB_MAPPING.items()}
|
||||||
|
|
||||||
|
|
||||||
|
def serializer_from_biolomics(ws_data, client=None) -> Publication:
|
||||||
|
pub = Publication()
|
||||||
|
|
||||||
|
pub.record_id = ws_data[RECORD_ID]
|
||||||
|
pub.record_name = ws_data[RECORD_NAME]
|
||||||
|
pub.title = ws_data[RECORD_NAME]
|
||||||
|
for field, value in ws_data['RecordDetails'].items():
|
||||||
|
value = value['Value']
|
||||||
|
if not value:
|
||||||
|
continue
|
||||||
|
attr = REV_PUB_MAPPING.get(field, None)
|
||||||
|
if not attr:
|
||||||
|
continue
|
||||||
|
if attr in ('year', 'first_page', 'last_page'):
|
||||||
|
value = int(value)
|
||||||
|
setattr(pub, attr, value)
|
||||||
|
return pub
|
||||||
|
|
||||||
|
|
||||||
|
def get_publication_record_name(publication):
|
||||||
|
if publication.record_name:
|
||||||
|
return publication.record_name
|
||||||
|
if publication.title:
|
||||||
|
return publication.title
|
||||||
|
if publication.pubmed_id:
|
||||||
|
return f'PUBMED:{publication.pubmed_id}'
|
||||||
|
if publication.doi:
|
||||||
|
return f'DOI:{publication.doi}'
|
||||||
|
|
||||||
|
|
||||||
|
def serializer_to_biolomics(publication: Publication, client=None, update=False):
|
||||||
|
ws_data = {}
|
||||||
|
if publication.record_id:
|
||||||
|
ws_data[RECORD_ID] = publication.record_id
|
||||||
|
ws_data[RECORD_NAME] = get_publication_record_name(publication)
|
||||||
|
details = {}
|
||||||
|
for attr, field in PUB_MAPPING.items():
|
||||||
|
value = getattr(publication, attr, None)
|
||||||
|
if value is None:
|
||||||
|
continue
|
||||||
|
field_type = 'D' if attr == 'year' else "E"
|
||||||
|
details[field] = {'Value': value, 'FieldType': field_type}
|
||||||
|
ws_data['RecordDetails'] = details
|
||||||
|
return ws_data
|
||||||
66
mirri/biolomics/serializers/growth_media.py
Normal file
66
mirri/biolomics/serializers/growth_media.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
from mirri.biolomics.serializers import RECORD_ID, RECORD_NAME, RECORD_DETAILS
|
||||||
|
from mirri.entities.growth_medium import GrowthMedium
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_from_biolomics(ws_data, client=None) -> GrowthMedium:
|
||||||
|
medium = GrowthMedium()
|
||||||
|
medium.record_name = ws_data.get('RecordName', None)
|
||||||
|
medium.description = get_growth_medium_record_name(medium)
|
||||||
|
medium.record_id = ws_data.get('RecordId', None)
|
||||||
|
for key, value in ws_data['RecordDetails'].items():
|
||||||
|
value = value['Value']
|
||||||
|
if not value:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if key == "Full description":
|
||||||
|
medium.full_description = value
|
||||||
|
if key == "Ingredients":
|
||||||
|
medium.ingredients = value
|
||||||
|
if key == 'Medium description':
|
||||||
|
medium.description = value
|
||||||
|
if key == 'Other name':
|
||||||
|
medium.other_name= value
|
||||||
|
if key == 'pH':
|
||||||
|
medium.ph = value
|
||||||
|
if key == 'Sterilization conditions':
|
||||||
|
medium.sterilization_conditions = value
|
||||||
|
return medium
|
||||||
|
|
||||||
|
|
||||||
|
def get_growth_medium_record_name(growth_medium):
|
||||||
|
if growth_medium.record_name:
|
||||||
|
return growth_medium.record_name
|
||||||
|
if growth_medium.description:
|
||||||
|
return growth_medium.description
|
||||||
|
if growth_medium.acronym:
|
||||||
|
return growth_medium.acronym
|
||||||
|
|
||||||
|
|
||||||
|
GROWTH_MEDIUM_MAPPING = {
|
||||||
|
'acronym': 'Acronym',
|
||||||
|
'full_description': "Full description",
|
||||||
|
'ingredients': "Ingredients",
|
||||||
|
'description': 'Medium description',
|
||||||
|
'other_name': 'Other name',
|
||||||
|
'ph': 'pH',
|
||||||
|
'sterilization_conditions': 'Sterilization conditions'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_to_biolomics(growth_medium: GrowthMedium, client=None, update=False):
|
||||||
|
ws_data = {}
|
||||||
|
if growth_medium.record_id:
|
||||||
|
ws_data[RECORD_ID] = growth_medium.record_id
|
||||||
|
record_name = get_growth_medium_record_name(growth_medium)
|
||||||
|
ws_data[RECORD_NAME] = record_name
|
||||||
|
details = {}
|
||||||
|
for field in growth_medium.fields:
|
||||||
|
if field in ('acronym', 'record_id', 'record_name'):
|
||||||
|
continue
|
||||||
|
value = getattr(growth_medium, field, None)
|
||||||
|
if value is not None:
|
||||||
|
details[GROWTH_MEDIUM_MAPPING[field]] = {'Value': value, 'FieldType': 'E'}
|
||||||
|
|
||||||
|
ws_data[RECORD_DETAILS] = details
|
||||||
|
return ws_data
|
||||||
|
|
||||||
26
mirri/biolomics/serializers/locality.py
Normal file
26
mirri/biolomics/serializers/locality.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
from mirri.entities.location import Location
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_from_biolomics(ws_data, client=None):
|
||||||
|
return ws_data
|
||||||
|
|
||||||
|
|
||||||
|
# this is a proof of concept
|
||||||
|
def serialize_location(location: Location):
|
||||||
|
fields = {}
|
||||||
|
if location.country:
|
||||||
|
fields['Country'] = {'Value': location.country, 'FieldType': 'E'}
|
||||||
|
if location.latitude and location.longitude:
|
||||||
|
value = {'Latitude': location.latitude,
|
||||||
|
'Longitude': location.longitude}
|
||||||
|
if location.coord_uncertainty:
|
||||||
|
value['Precision'] = location.coord_uncertainty
|
||||||
|
fields['GIS position'] = {'FieldType': 'L', 'Value': value}
|
||||||
|
|
||||||
|
fields['Strains'] = {"FieldType": "RLink", 'Value': [{
|
||||||
|
'Name': {'Value': None, 'FieldType': "E"},
|
||||||
|
'RecordId': None
|
||||||
|
}]}
|
||||||
|
|
||||||
|
return {"RecordDetails": fields,
|
||||||
|
"RecordName": location.country}
|
||||||
2
mirri/biolomics/serializers/ontobiotope.py
Normal file
2
mirri/biolomics/serializers/ontobiotope.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
def serialize_from_biolomics(ws_data, client=None):
|
||||||
|
return ws_data
|
||||||
81
mirri/biolomics/serializers/sequence.py
Normal file
81
mirri/biolomics/serializers/sequence.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
from mirri.entities.sequence import GenomicSequence
|
||||||
|
from mirri.biolomics.serializers import RECORD_ID, RECORD_NAME, RECORD_DETAILS
|
||||||
|
|
||||||
|
|
||||||
|
class GenomicSequenceBiolomics(GenomicSequence):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super().__init__(freeze=False, **kwargs)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def record_id(self) -> int:
|
||||||
|
return self._data.get(RECORD_ID, None)
|
||||||
|
|
||||||
|
@record_id.setter
|
||||||
|
def record_id(self, value: int):
|
||||||
|
self._data[RECORD_ID] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def record_name(self) -> str:
|
||||||
|
return self._data.get(RECORD_NAME, None)
|
||||||
|
|
||||||
|
@record_name.setter
|
||||||
|
def record_name(self, value: str):
|
||||||
|
self._data[RECORD_NAME] = value
|
||||||
|
|
||||||
|
def dict(self):
|
||||||
|
_data = super(GenomicSequenceBiolomics, self).dict()
|
||||||
|
if self.record_id:
|
||||||
|
_data[RECORD_ID] = self.record_id
|
||||||
|
if self.record_name:
|
||||||
|
_data[RECORD_NAME] = self.record_name
|
||||||
|
return _data
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_to_biolomics(marker: GenomicSequenceBiolomics, client=None, update=False):
|
||||||
|
ws_sequence = {}
|
||||||
|
print()
|
||||||
|
if marker.record_id:
|
||||||
|
ws_sequence[RECORD_ID] = marker.record_id
|
||||||
|
if marker.record_name:
|
||||||
|
ws_sequence[RECORD_NAME] = marker.record_name
|
||||||
|
else:
|
||||||
|
ws_sequence[RECORD_NAME] = marker.marker_id
|
||||||
|
details = {}
|
||||||
|
if marker.marker_id:
|
||||||
|
details["INSDC number"] = {"Value": marker.marker_id,
|
||||||
|
"FieldType": "E"}
|
||||||
|
if marker.marker_seq:
|
||||||
|
details["DNA sequence"] = {
|
||||||
|
"Value": {"Sequence": marker.marker_seq},
|
||||||
|
"FieldType": "N"}
|
||||||
|
if marker.marker_type:
|
||||||
|
details['Marker name'] = {"Value": marker.marker_type, "FieldType": "E"}
|
||||||
|
|
||||||
|
ws_sequence[RECORD_DETAILS] = details
|
||||||
|
|
||||||
|
return ws_sequence
|
||||||
|
|
||||||
|
|
||||||
|
MAPPING_WS_SPEC_TYPES = {
|
||||||
|
'Beta tubulin': 'TUBB'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_from_biolomics(ws_data, client=None) -> GenomicSequenceBiolomics:
|
||||||
|
marker = GenomicSequenceBiolomics()
|
||||||
|
marker.record_id = ws_data[RECORD_ID]
|
||||||
|
marker.record_name = ws_data[RECORD_NAME]
|
||||||
|
|
||||||
|
for key, value in ws_data['RecordDetails'].items():
|
||||||
|
value = value['Value']
|
||||||
|
if key == 'INSDC number' and value:
|
||||||
|
marker.marker_id = value
|
||||||
|
elif key == 'Marker name' and value:
|
||||||
|
kind = MAPPING_WS_SPEC_TYPES.get(value, None)
|
||||||
|
value = kind if kind else value
|
||||||
|
marker.marker_type = value
|
||||||
|
|
||||||
|
elif key == 'DNA sequence' and 'Sequence' in value and value['Sequence']:
|
||||||
|
marker.marker_seq = value['Sequence']
|
||||||
|
|
||||||
|
return marker
|
||||||
462
mirri/biolomics/serializers/strain.py
Normal file
462
mirri/biolomics/serializers/strain.py
Normal file
@ -0,0 +1,462 @@
|
|||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import pycountry
|
||||||
|
|
||||||
|
from mirri import rgetattr, rsetattr
|
||||||
|
from mirri.entities.date_range import DateRange
|
||||||
|
from mirri.entities.strain import ORG_TYPES, OrganismType, StrainId, StrainMirri, add_taxon_to_strain
|
||||||
|
from mirri.biolomics.remote.endoint_names import (GROWTH_MEDIUM_WS, TAXONOMY_WS,
|
||||||
|
ONTOBIOTOPE_WS, BIBLIOGRAPHY_WS, SEQUENCE_WS, COUNTRY_WS)
|
||||||
|
from mirri.settings import (
|
||||||
|
ALLOWED_FORMS_OF_SUPPLY,
|
||||||
|
NAGOYA_PROBABLY_SCOPE,
|
||||||
|
NAGOYA_NO_RESTRICTIONS,
|
||||||
|
NAGOYA_DOCS_AVAILABLE,
|
||||||
|
NO_RESTRICTION,
|
||||||
|
ONLY_RESEARCH,
|
||||||
|
COMMERCIAL_USE_WITH_AGREEMENT,
|
||||||
|
)
|
||||||
|
from mirri.biolomics.settings import MIRRI_FIELDS
|
||||||
|
from mirri.utils import get_pycountry
|
||||||
|
|
||||||
|
NAGOYA_TRANSLATOR = {
|
||||||
|
NAGOYA_NO_RESTRICTIONS: "no known restrictions under the Nagoya protocol",
|
||||||
|
NAGOYA_DOCS_AVAILABLE: "documents providing proof of legal access and terms of use available at the collection",
|
||||||
|
NAGOYA_PROBABLY_SCOPE: "strain probably in scope, please contact the culture collection",
|
||||||
|
}
|
||||||
|
REV_NAGOYA_TRANSLATOR = {v: k for k, v in NAGOYA_TRANSLATOR.items()}
|
||||||
|
|
||||||
|
RESTRICTION_USE_TRANSLATOR = {
|
||||||
|
NO_RESTRICTION: "no restriction apply",
|
||||||
|
ONLY_RESEARCH: "for research use only",
|
||||||
|
COMMERCIAL_USE_WITH_AGREEMENT: "for commercial development a special agreement is requested",
|
||||||
|
}
|
||||||
|
|
||||||
|
REV_RESTRICTION_USE_TRANSLATOR = {v: k for k,
|
||||||
|
v in RESTRICTION_USE_TRANSLATOR.items()}
|
||||||
|
|
||||||
|
DATE_TYPE_FIELDS = ("Date of collection", "Date of isolation",
|
||||||
|
"Date of deposit", "Date of inclusion in the catalogue")
|
||||||
|
BOOLEAN_TYPE_FIELDS = ("Strain from a registered collection", "Dual use",
|
||||||
|
"Quarantine in Europe", "Interspecific hybrid") # , 'GMO')
|
||||||
|
FILE_TYPE_FIELDS = ("MTA file", "ABS related files")
|
||||||
|
MAX_MIN_TYPE_FIELDS = ("Tested temperature growth range",
|
||||||
|
"Recommended growth temperature")
|
||||||
|
LIST_TYPES_TO_JOIN = ('Other denomination', 'Plasmids collections fields', 'Plasmids')
|
||||||
|
|
||||||
|
MARKER_TYPE_MAPPING = {
|
||||||
|
'16S rRNA': 'Sequences 16s', # or Sequences c16S rRNA
|
||||||
|
'ACT': 'Sequences ACT',
|
||||||
|
'CaM': 'Sequences CaM',
|
||||||
|
'EF-1α': 'Sequences TEF1a',
|
||||||
|
'ITS': 'Sequences ITS',
|
||||||
|
'LSU': 'Sequences LSU',
|
||||||
|
'RPB1': 'Sequences RPB1',
|
||||||
|
'RPB2': 'Sequences RPB2',
|
||||||
|
'TUBB': 'Sequences TUB' # or Sequences Beta tubulin
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_to_biolomics(strain: StrainMirri, client=None, update=False,
|
||||||
|
log_fhand=None): # sourcery no-metrics
|
||||||
|
if log_fhand is None:
|
||||||
|
log_fhand = sys.stdout
|
||||||
|
strain_record_details = {}
|
||||||
|
|
||||||
|
for field in MIRRI_FIELDS:
|
||||||
|
try:
|
||||||
|
biolomics_field = field["biolomics"]["field"]
|
||||||
|
biolomics_type = field["biolomics"]["type"]
|
||||||
|
except KeyError:
|
||||||
|
# print(f'biolomics not configured: {field["label"]}')
|
||||||
|
continue
|
||||||
|
|
||||||
|
label = field["label"]
|
||||||
|
attribute = field["attribute"]
|
||||||
|
value = rgetattr(strain, attribute, None)
|
||||||
|
if value is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if label == "Accession number":
|
||||||
|
value = f"{strain.id.collection} {strain.id.number}"
|
||||||
|
if label == "Restrictions on use":
|
||||||
|
value = RESTRICTION_USE_TRANSLATOR[value]
|
||||||
|
elif label == "Nagoya protocol restrictions and compliance conditions":
|
||||||
|
value = NAGOYA_TRANSLATOR[value]
|
||||||
|
elif label in FILE_TYPE_FIELDS:
|
||||||
|
value = [{"Name": "link", "Value": fname} for fname in value]
|
||||||
|
elif label == "Other culture collection numbers":
|
||||||
|
value = "; ".join(on.strain_id for on in value) if value else None
|
||||||
|
elif label in BOOLEAN_TYPE_FIELDS:
|
||||||
|
value = 'yes' if value else 'no'
|
||||||
|
elif label in 'GMO':
|
||||||
|
value = 'Yes' if value else 'No'
|
||||||
|
elif label == "Organism type":
|
||||||
|
org_types = [ot.name for ot in value]
|
||||||
|
value = []
|
||||||
|
for ot in ORG_TYPES.keys():
|
||||||
|
is_organism = "yes" if ot in org_types else "no"
|
||||||
|
value.append({"Name": ot, "Value": is_organism})
|
||||||
|
elif label == 'Taxon name':
|
||||||
|
if client:
|
||||||
|
taxa = strain.taxonomy.long_name.split(';')
|
||||||
|
value = []
|
||||||
|
for taxon_name in taxa:
|
||||||
|
taxon = get_remote_rlink(client, TAXONOMY_WS,
|
||||||
|
taxon_name)
|
||||||
|
if taxon:
|
||||||
|
value.append(taxon)
|
||||||
|
if not value:
|
||||||
|
msg = f'WARNING: {strain.taxonomy.long_name} not found in database'
|
||||||
|
log_fhand.write(msg + '\n')
|
||||||
|
# TODO: decide to raise or not if taxon not in MIRRI DB
|
||||||
|
#raise ValueError(msg)
|
||||||
|
|
||||||
|
elif label in DATE_TYPE_FIELDS:
|
||||||
|
year = value._year
|
||||||
|
month = value._month or 1
|
||||||
|
day = value._day or 1
|
||||||
|
if year is None:
|
||||||
|
continue
|
||||||
|
value = f"{year}-{month:02}-{day:02}"
|
||||||
|
elif label == 'History of deposit':
|
||||||
|
value = " < ".join(value)
|
||||||
|
elif label in MAX_MIN_TYPE_FIELDS:
|
||||||
|
if isinstance(value, (int, float, str)):
|
||||||
|
_max, _min = float(value), float(value)
|
||||||
|
else:
|
||||||
|
_max, _min = float(value['max']), float(value['min'])
|
||||||
|
|
||||||
|
content = {"MaxValue": _max, "MinValue": _min,
|
||||||
|
"FieldType": biolomics_type}
|
||||||
|
strain_record_details[biolomics_field] = content
|
||||||
|
continue
|
||||||
|
elif label in LIST_TYPES_TO_JOIN:
|
||||||
|
value = '; '.join(value)
|
||||||
|
# TODO: Check how to deal with crossrefs
|
||||||
|
elif label == "Recommended medium for growth":
|
||||||
|
if client is not None:
|
||||||
|
ref_value = []
|
||||||
|
for medium in value:
|
||||||
|
ws_gm = client.retrieve_by_name(GROWTH_MEDIUM_WS, medium)
|
||||||
|
if ws_gm is None:
|
||||||
|
raise ValueError(
|
||||||
|
f'Can not find the growth medium: {medium}')
|
||||||
|
gm = {"Name": {"Value": medium, "FieldType": "E"},
|
||||||
|
"RecordId": ws_gm.record_id}
|
||||||
|
ref_value.append(gm)
|
||||||
|
value = ref_value
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
elif label == "Form of supply":
|
||||||
|
_value = []
|
||||||
|
for form in ALLOWED_FORMS_OF_SUPPLY:
|
||||||
|
is_form = "yes" if form in value else "no"
|
||||||
|
_value.append({"Name": form, "Value": is_form})
|
||||||
|
value = _value
|
||||||
|
# print(label, value), biolomics_field
|
||||||
|
elif label == "Coordinates of geographic origin":
|
||||||
|
value = {'Latitude': strain.collect.location.latitude,
|
||||||
|
'Longitude': strain.collect.location.longitude}
|
||||||
|
precision = strain.collect.location.coord_uncertainty
|
||||||
|
if precision is not None:
|
||||||
|
value['Precision'] = precision
|
||||||
|
elif label == "Geographic origin":
|
||||||
|
if client is not None and value.country is not None:
|
||||||
|
country = get_pycountry(value.country)
|
||||||
|
if country is None:
|
||||||
|
log_fhand.write(f'WARNING: {value.country} Not a valida country code/name\n')
|
||||||
|
else:
|
||||||
|
_value = get_country_record(country, client)
|
||||||
|
if _value is None: # TODO: Remove this once the countries are added to the DB
|
||||||
|
msg = f'WARNING: {value.country} not in MIRRI DB'
|
||||||
|
log_fhand.write(msg + '\n')
|
||||||
|
#raise ValueError(msg)
|
||||||
|
else:
|
||||||
|
content = {"Value": [_value], "FieldType": "RLink"}
|
||||||
|
strain_record_details['Country'] = content
|
||||||
|
_value = []
|
||||||
|
for sector in ('state', 'municipality', 'site'):
|
||||||
|
sector_val = getattr(value, sector, None)
|
||||||
|
if sector_val:
|
||||||
|
_value.append(sector_val)
|
||||||
|
value = "; ".join(_value) if _value else None
|
||||||
|
if value is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
elif label == "Ontobiotope":
|
||||||
|
if client and value:
|
||||||
|
onto = get_remote_rlink(client, ONTOBIOTOPE_WS, value)
|
||||||
|
value = [onto] if onto is not None else None
|
||||||
|
elif label == 'Literature':
|
||||||
|
if client and value:
|
||||||
|
pub_rlinks = []
|
||||||
|
for pub in value:
|
||||||
|
rlink = get_remote_rlink(client, BIBLIOGRAPHY_WS, pub.title)
|
||||||
|
if rlink:
|
||||||
|
pub_rlinks.append(rlink)
|
||||||
|
if pub_rlinks:
|
||||||
|
value = pub_rlinks
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
elif label == '':
|
||||||
|
pass
|
||||||
|
|
||||||
|
elif label == 'Ploidy':
|
||||||
|
value = _translate_polidy(value)
|
||||||
|
if value is not None:
|
||||||
|
content = {"Value": value, "FieldType": biolomics_type}
|
||||||
|
strain_record_details[biolomics_field] = content
|
||||||
|
|
||||||
|
# if False:
|
||||||
|
# record_details["Data provided by"] = {
|
||||||
|
# "Value": strain.id.collection, "FieldType": "V"}
|
||||||
|
|
||||||
|
#Markers
|
||||||
|
if client:
|
||||||
|
add_markers_to_strain_details(client, strain, strain_record_details)
|
||||||
|
|
||||||
|
strain_structure = {"RecordDetails": strain_record_details}
|
||||||
|
if update:
|
||||||
|
strain_structure['RecordId'] = strain.record_id
|
||||||
|
strain_structure['RecordName'] = strain.record_name
|
||||||
|
else:
|
||||||
|
strain_structure["Acronym"] = "MIRRI"
|
||||||
|
|
||||||
|
return strain_structure
|
||||||
|
|
||||||
|
|
||||||
|
def add_markers_to_strain_details(client, strain: StrainMirri, details):
|
||||||
|
for marker in strain.genetics.markers:
|
||||||
|
marker_name = marker.marker_id
|
||||||
|
marker_in_ws = client.retrieve_by_name(SEQUENCE_WS, marker_name)
|
||||||
|
if marker_in_ws is None:
|
||||||
|
print('Marker not in web service')
|
||||||
|
continue
|
||||||
|
marker_type = marker.marker_type
|
||||||
|
ws_marker = {
|
||||||
|
"Value": [{
|
||||||
|
"Name": {"Value": marker_in_ws.record_name,
|
||||||
|
"FieldType": "E"},
|
||||||
|
"RecordId": marker_in_ws.record_id
|
||||||
|
}],
|
||||||
|
"FieldType": "NLink"
|
||||||
|
}
|
||||||
|
if marker_in_ws.marker_seq:
|
||||||
|
ws_marker['Value'][0]["TargetFieldValue"] = {
|
||||||
|
"Value": {"Sequence": marker_in_ws.marker_seq},
|
||||||
|
"FieldType": "N"
|
||||||
|
}
|
||||||
|
|
||||||
|
details[MARKER_TYPE_MAPPING[marker_type]] = ws_marker
|
||||||
|
|
||||||
|
|
||||||
|
def get_remote_rlink(client, endpoint, record_name):
|
||||||
|
entity = client.retrieve_by_name(endpoint, record_name)
|
||||||
|
if entity:
|
||||||
|
# some Endpoints does not serialize the json into a python object yet
|
||||||
|
try:
|
||||||
|
record_name = entity.record_name
|
||||||
|
record_id = entity.record_id
|
||||||
|
except AttributeError:
|
||||||
|
record_name = entity["RecordName"]
|
||||||
|
record_id = entity["RecordId"]
|
||||||
|
return {"Name": {"Value": record_name, "FieldType": "E"},
|
||||||
|
"RecordId": record_id}
|
||||||
|
|
||||||
|
|
||||||
|
def add_strain_rlink_to_entity(record, strain_id, strain_name):
|
||||||
|
field_strain = {
|
||||||
|
"FieldType": "RLink",
|
||||||
|
'Value': [{
|
||||||
|
'Name': {'Value': strain_name, 'FieldType': "E"},
|
||||||
|
'RecordId': strain_id
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
record['RecordDetails']['Strains'] = field_strain
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
PLOIDY_TRANSLATOR = {
|
||||||
|
0: 'Aneuploid',
|
||||||
|
1: 'Haploid',
|
||||||
|
2: 'Diploid',
|
||||||
|
3: 'Triploid',
|
||||||
|
4: 'Tetraploid',
|
||||||
|
9: 'Polyploid'
|
||||||
|
}
|
||||||
|
|
||||||
|
REV_PLOIDY_TRANSLATOR = {v: k for k, v in PLOIDY_TRANSLATOR.items()}
|
||||||
|
|
||||||
|
|
||||||
|
def _translate_polidy(ploidy):
|
||||||
|
# print('ploidy in serializer', ploidy)
|
||||||
|
try:
|
||||||
|
ploidy = int(ploidy)
|
||||||
|
except TypeError:
|
||||||
|
return '?'
|
||||||
|
try:
|
||||||
|
ploidy = PLOIDY_TRANSLATOR[ploidy]
|
||||||
|
except KeyError:
|
||||||
|
ploidy = 'Polyploid'
|
||||||
|
return ploidy
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_from_biolomics(biolomics_strain, client=None): # sourcery no-metrics
|
||||||
|
strain = StrainMirri()
|
||||||
|
strain.record_id = biolomics_strain.get('RecordId', None)
|
||||||
|
strain.record_name = biolomics_strain.get('RecordName', None)
|
||||||
|
for field in MIRRI_FIELDS:
|
||||||
|
try:
|
||||||
|
biolomics_field = field["biolomics"]["field"]
|
||||||
|
except KeyError:
|
||||||
|
# print(f'biolomics not configured: {field["label"]}')
|
||||||
|
continue
|
||||||
|
|
||||||
|
label = field["label"]
|
||||||
|
attribute = field["attribute"]
|
||||||
|
field_data = biolomics_strain['RecordDetails'].get(biolomics_field, None)
|
||||||
|
if field_data is None:
|
||||||
|
continue
|
||||||
|
is_empty = field_data.get('IsEmpty')
|
||||||
|
if is_empty:
|
||||||
|
continue
|
||||||
|
if biolomics_field in ('Tested temperature growth range', 'Recommended growth temperature'):
|
||||||
|
value = {'max': field_data.get('MaxValue', None),
|
||||||
|
'min': field_data.get('MinValue', None)}
|
||||||
|
else:
|
||||||
|
value = field_data['Value']
|
||||||
|
# if value in (None, '', [], {}, '?', 'Unknown', 'nan', 'NaN'):
|
||||||
|
# continue
|
||||||
|
|
||||||
|
# print(label, attribute, biolomics_field, value)
|
||||||
|
|
||||||
|
if label == 'Accession number':
|
||||||
|
number = strain.record_name
|
||||||
|
mirri_id = StrainId(number=number)
|
||||||
|
strain.synonyms = [mirri_id]
|
||||||
|
coll, num = value.split(' ', 1)
|
||||||
|
accession_number_id = StrainId(collection=coll, number=num)
|
||||||
|
strain.id = accession_number_id
|
||||||
|
continue
|
||||||
|
elif label == "Restrictions on use":
|
||||||
|
value = REV_RESTRICTION_USE_TRANSLATOR[value]
|
||||||
|
elif label == 'Nagoya protocol restrictions and compliance conditions':
|
||||||
|
value = REV_NAGOYA_TRANSLATOR[value]
|
||||||
|
elif label in FILE_TYPE_FIELDS:
|
||||||
|
value = [f['Value'] for f in value]
|
||||||
|
elif label == "Other culture collection numbers":
|
||||||
|
other_numbers = []
|
||||||
|
for on in value.split(";"):
|
||||||
|
on = on.strip()
|
||||||
|
try:
|
||||||
|
collection, number = on.split(" ", 1)
|
||||||
|
except ValueError:
|
||||||
|
collection = None
|
||||||
|
number = on
|
||||||
|
_id = StrainId(collection=collection, number=number)
|
||||||
|
other_numbers.append(_id)
|
||||||
|
value = other_numbers
|
||||||
|
elif label in BOOLEAN_TYPE_FIELDS:
|
||||||
|
value = value == 'yes'
|
||||||
|
elif label == 'GMO':
|
||||||
|
value = value == 'Yes'
|
||||||
|
elif label == "Organism type":
|
||||||
|
organism_types = [OrganismType(item['Name']) for item in value if item['Value'] == 'yes']
|
||||||
|
if organism_types:
|
||||||
|
value = organism_types
|
||||||
|
elif label in 'Taxon name':
|
||||||
|
value = ";".join([v['Name']['Value'] for v in value])
|
||||||
|
add_taxon_to_strain(strain, value)
|
||||||
|
continue
|
||||||
|
|
||||||
|
elif label in DATE_TYPE_FIELDS:
|
||||||
|
# date_range = DateRange()
|
||||||
|
value = DateRange().strpdate(value)
|
||||||
|
|
||||||
|
elif label in ("Recommended growth temperature",
|
||||||
|
"Tested temperature growth range"):
|
||||||
|
if (value['max'] is None or value['max'] == 0 or
|
||||||
|
value['min'] is None and value['min'] == 0):
|
||||||
|
continue
|
||||||
|
elif label == "Recommended medium for growth":
|
||||||
|
value = [v['Name']['Value'] for v in value]
|
||||||
|
elif label == "Form of supply":
|
||||||
|
value = [item['Name'] for item in value if item['Value'] == 'yes']
|
||||||
|
elif label in LIST_TYPES_TO_JOIN:
|
||||||
|
value = [v.strip() for v in value.split(";")]
|
||||||
|
elif label == "Coordinates of geographic origin":
|
||||||
|
if ('Longitude' in value and 'Latitude' in value and
|
||||||
|
isinstance(value['Longitude'], float) and
|
||||||
|
isinstance(value['Latitude'], float)):
|
||||||
|
strain.collect.location.longitude = value['Longitude']
|
||||||
|
strain.collect.location.latitude = value['Latitude']
|
||||||
|
if value['Precision'] != 0:
|
||||||
|
strain.collect.location.coord_uncertainty = value['Precision']
|
||||||
|
continue
|
||||||
|
elif label == "Altitude of geographic origin":
|
||||||
|
value = float(value)
|
||||||
|
elif label == "Geographic origin":
|
||||||
|
strain.collect.location.site = value
|
||||||
|
continue
|
||||||
|
elif label == 'Ontobiotope':
|
||||||
|
try:
|
||||||
|
value = re.search("(OBT:[0-9]{5,7})", value[0]['Name']['Value']).group()
|
||||||
|
except (KeyError, IndexError, AttributeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
elif label == 'Ploidy':
|
||||||
|
value = REV_PLOIDY_TRANSLATOR[value]
|
||||||
|
elif label == 'Literature':
|
||||||
|
if client is not None:
|
||||||
|
pubs = []
|
||||||
|
for pub in value:
|
||||||
|
pub = client.retrieve_by_id(BIBLIOGRAPHY_WS, pub['RecordId'])
|
||||||
|
pubs.append(pub)
|
||||||
|
value = pubs
|
||||||
|
|
||||||
|
|
||||||
|
rsetattr(strain, attribute, value)
|
||||||
|
# fields that are not in MIRRI FIELD list
|
||||||
|
# country
|
||||||
|
if 'Country' in biolomics_strain['RecordDetails'] and biolomics_strain['RecordDetails']['Country']:
|
||||||
|
try:
|
||||||
|
country_name = biolomics_strain['RecordDetails']['Country']['Value'][0]['Name']['Value']
|
||||||
|
country = get_pycountry(country_name)
|
||||||
|
country_3 = country.alpha_3 if country else None
|
||||||
|
except (IndexError, KeyError):
|
||||||
|
country_3 = None
|
||||||
|
if country_3:
|
||||||
|
strain.collect.location.country = country_3
|
||||||
|
# Markers:
|
||||||
|
if client:
|
||||||
|
markers = []
|
||||||
|
for marker_type, biolomics_marker in MARKER_TYPE_MAPPING.items():
|
||||||
|
try:
|
||||||
|
marker_value = biolomics_strain['RecordDetails'][biolomics_marker]['Value']
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
if not marker_value:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for marker in marker_value:
|
||||||
|
record_id = marker['RecordId']
|
||||||
|
marker = client.retrieve_by_id(SEQUENCE_WS, record_id)
|
||||||
|
if marker is not None:
|
||||||
|
markers.append(marker)
|
||||||
|
if markers:
|
||||||
|
strain.genetics.markers = markers
|
||||||
|
|
||||||
|
return strain
|
||||||
|
|
||||||
|
|
||||||
|
def get_country_record(country, client):
|
||||||
|
for attr in ('common_name', 'name', 'official_name'):
|
||||||
|
val = getattr(country, attr, None)
|
||||||
|
if val is not None:
|
||||||
|
_value = get_remote_rlink(client, COUNTRY_WS, val)
|
||||||
|
if _value is not None:
|
||||||
|
return _value
|
||||||
|
return None
|
||||||
64
mirri/biolomics/serializers/taxonomy.py
Normal file
64
mirri/biolomics/serializers/taxonomy.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
|
||||||
|
from mirri.entities.strain import Taxonomy
|
||||||
|
|
||||||
|
#TODO this is all wrong, needs deep revision
|
||||||
|
|
||||||
|
class TaxonomyMirri(Taxonomy):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super().__init__(freeze=False, **kwargs)
|
||||||
|
|
||||||
|
fields = ['record_id', 'record_name', 'acronym', 'full_description',
|
||||||
|
'ingredients', 'description', 'other_name', 'ph',
|
||||||
|
'sterilization_conditions']
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
self._data = {}
|
||||||
|
for field in self.fields:
|
||||||
|
if field in kwargs and kwargs['field'] is not None:
|
||||||
|
value = kwargs['field']
|
||||||
|
setattr(self, field, value)
|
||||||
|
|
||||||
|
def __setattr__(self, attr, value):
|
||||||
|
if attr == '_data':
|
||||||
|
super().__setattr__(attr, value)
|
||||||
|
return
|
||||||
|
if attr not in self.fields:
|
||||||
|
raise TypeError(f'{attr} not an allowed attribute')
|
||||||
|
self._data[attr] = value
|
||||||
|
|
||||||
|
def __getattr__(self, attr):
|
||||||
|
if attr == '_data':
|
||||||
|
return super
|
||||||
|
if attr not in self.fields and attr != '_data':
|
||||||
|
raise TypeError(f'{attr} not an allowed attribute')
|
||||||
|
return self._data.get(attr, None)
|
||||||
|
|
||||||
|
def dict(self):
|
||||||
|
return self._data
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_from_biolomics(ws_data, client=None) -> TaxonomyMirri:
|
||||||
|
|
||||||
|
return ws_data
|
||||||
|
medium = GrowthMedium()
|
||||||
|
medium.record_name = ws_data.get('RecordName', None)
|
||||||
|
medium.record_id = ws_data.get('RecordId', None)
|
||||||
|
for key, value in ws_data['RecordDetails'].items():
|
||||||
|
value = value['Value']
|
||||||
|
if not value:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if key == "Full description":
|
||||||
|
medium.full_description = value
|
||||||
|
if key == "Ingredients":
|
||||||
|
medium.ingredients = value
|
||||||
|
if key == 'Medium description':
|
||||||
|
medium.description = value
|
||||||
|
if key == 'Other name':
|
||||||
|
medium.other_name= value
|
||||||
|
if key == 'pH':
|
||||||
|
medium.ph = value
|
||||||
|
if key == 'Sterilization conditions':
|
||||||
|
medium.sterilization_conditions = value
|
||||||
|
|
||||||
|
return medium
|
||||||
373
mirri/biolomics/settings.py
Normal file
373
mirri/biolomics/settings.py
Normal file
@ -0,0 +1,373 @@
|
|||||||
|
try:
|
||||||
|
from mirri.biolomics.secrets import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
'You need a secrets.py in the project dir. with CLIENT_ID, SECRET_ID, USERNAME, PASSWORD')
|
||||||
|
|
||||||
|
MIRRI_FIELDS = [
|
||||||
|
{
|
||||||
|
"attribute": "id",
|
||||||
|
"label": "Accession number",
|
||||||
|
"mandatory": True,
|
||||||
|
"biolomics": {"field": "Collection accession number", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "restriction_on_use",
|
||||||
|
"label": "Restrictions on use",
|
||||||
|
"mandatory": True,
|
||||||
|
"biolomics": {"field": "Restrictions on use", "type": "T"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "nagoya_protocol",
|
||||||
|
"label": "Nagoya protocol restrictions and compliance conditions",
|
||||||
|
"mandatory": True,
|
||||||
|
"biolomics": {"field": "Nagoya protocol restrictions and compliance conditions", "type": "T"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "abs_related_files",
|
||||||
|
"label": "ABS related files",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "ABS related files", "type": "U"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "mta_files",
|
||||||
|
"label": "MTA file",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "MTA files URL", "type": "U"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "other_numbers",
|
||||||
|
"label": "Other culture collection numbers",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Other culture collection numbers", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "is_from_registered_collection",
|
||||||
|
"label": "Strain from a registered collection",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Strain from a registered collection", "type": "T"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "risk_group",
|
||||||
|
"label": "Risk Group",
|
||||||
|
"mandatory": True,
|
||||||
|
"biolomics": {"field": "Risk group", "type": "T"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "is_potentially_harmful",
|
||||||
|
"label": "Dual use",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Dual use", "type": "T"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "is_subject_to_quarantine",
|
||||||
|
"label": "Quarantine in Europe",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Quarantine in Europe", "type": "T"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "taxonomy.organism_type",
|
||||||
|
"label": "Organism type",
|
||||||
|
"mandatory": True,
|
||||||
|
"biolomics": {"field": "Organism type", "type": "C"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "taxonomy.long_name",
|
||||||
|
"label": "Taxon name",
|
||||||
|
"mandatory": True,
|
||||||
|
"biolomics": {"field": "Taxon name", "type": "SynLink"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "taxonomy.infrasubspecific_name",
|
||||||
|
"label": "Infrasubspecific names",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Infrasubspecific names", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "taxonomy.comments",
|
||||||
|
"label": "Comment on taxonomy",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Comment on taxonomy", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "taxonomy.interspecific_hybrid",
|
||||||
|
"label": "Interspecific hybrid",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Interspecific hybrid", "type": "T"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "status", "label": "Status", "mandatory": False,
|
||||||
|
"biolomics": {"field": "Status", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "history",
|
||||||
|
"label": "History of deposit",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "History", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "deposit.who",
|
||||||
|
"label": "Depositor",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Depositor", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "deposit.date",
|
||||||
|
"label": "Date of deposit",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Deposit date", "type": "H"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "catalog_inclusion_date",
|
||||||
|
"label": "Date of inclusion in the catalogue",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Date of inclusion in the catalogue", "type": "H"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "collect.who",
|
||||||
|
"label": "Collected by",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Collector", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "collect.date",
|
||||||
|
"label": "Date of collection",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Collection date", "type": "H"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "isolation.who",
|
||||||
|
"label": "Isolated by",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Isolator", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "isolation.date",
|
||||||
|
"label": "Date of isolation",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Isolation date", "type": "H"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "isolation.substrate_host_of_isolation",
|
||||||
|
"label": "Substrate/host of isolation",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Substrate of isolation", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "growth.tested_temp_range",
|
||||||
|
"label": "Tested temperature growth range",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Tested temperature growth range", "type": "S"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "growth.recommended_temp",
|
||||||
|
"label": "Recommended growth temperature",
|
||||||
|
"mandatory": True,
|
||||||
|
"biolomics": {"field": "Recommended growth temperature", "type": "S"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "growth.recommended_media",
|
||||||
|
"label": "Recommended medium for growth",
|
||||||
|
"mandatory": True,
|
||||||
|
"biolomics": {"field": "Recommended growth medium", "type": "RLink"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "form_of_supply",
|
||||||
|
"label": "Form of supply",
|
||||||
|
"mandatory": True,
|
||||||
|
"biolomics": {"field": "Form", "type": "C"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "other_denominations",
|
||||||
|
"label": "Other denomination",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Other denomination", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# here we use latitude to check if there is data in some of the fields
|
||||||
|
"attribute": "collect.location.latitude",
|
||||||
|
"label": "Coordinates of geographic origin",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Coordinates of geographic origin", "type": "L"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "collect.location.altitude",
|
||||||
|
"label": "Altitude of geographic origin",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Altitude of geographic origin", "type": "D"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "collect.location",
|
||||||
|
"label": "Geographic origin",
|
||||||
|
"mandatory": True,
|
||||||
|
"biolomics": {"field": "Geographic origin", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "collect.habitat",
|
||||||
|
"label": "Isolation habitat",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Isolation habitat", "type": "E"},
|
||||||
|
},
|
||||||
|
# {
|
||||||
|
# "attribute": "collect.habitat_ontobiotope",
|
||||||
|
# "label": "Ontobiotope term for the isolation habitat",
|
||||||
|
# "mandatory": False,
|
||||||
|
# "biolomics": {"field": "Ontobiotope term for the isolation habitat", "type": "E"},
|
||||||
|
# },
|
||||||
|
{
|
||||||
|
"attribute": "collect.habitat_ontobiotope",
|
||||||
|
"label": "Ontobiotope",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Ontobiotope", "type": "RLink"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "genetics.gmo", "label": "GMO", "mandatory": False,
|
||||||
|
"biolomics": {"field": "GMO", "type": "V"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "genetics.gmo_construction",
|
||||||
|
"label": "GMO construction information",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "GMO construction information", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "genetics.mutant_info",
|
||||||
|
"label": "Mutant information",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Mutant information", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "genetics.genotype",
|
||||||
|
"label": "Genotype",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Genotype", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "genetics.sexual_state",
|
||||||
|
"label": "Sexual state",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Sexual state", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "genetics.ploidy",
|
||||||
|
"label": "Ploidy",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Ploidy", "type": "T"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "genetics.plasmids",
|
||||||
|
"label": "Plasmids",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Plasmids", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "genetics.plasmids_in_collections",
|
||||||
|
"label": "Plasmids collections fields",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Plasmids collections fields", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "publications",
|
||||||
|
"label": "Literature",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Literature", "type": "RLink"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "pathogenicity",
|
||||||
|
"label": "Pathogenicity",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Pathogenicity", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "enzyme_production",
|
||||||
|
"label": "Enzyme production",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Enzyme production", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "production_of_metabolites",
|
||||||
|
"label": "Production of metabolites",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Metabolites production", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "applications",
|
||||||
|
"label": "Applications",
|
||||||
|
"mandatory": False,
|
||||||
|
"biolomics": {"field": "Applications", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "remarks", "label": "Remarks", "mandatory": False,
|
||||||
|
"biolomics": {"field": "Remarks", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "literature_linked_to_the_sequence_genome",
|
||||||
|
"label": "Literature linked to the sequence/genome",
|
||||||
|
"mandatory": False,
|
||||||
|
# "biolomics": {"field": "MTA files URL", "type": "U"},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
PUB_MIRRI_FIELDS = [
|
||||||
|
{
|
||||||
|
"attribute": "pub_id", "mandatory": False,
|
||||||
|
"biolomics": {"field": "", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "pubmed_id", "mandatory": False,
|
||||||
|
"biolomics": {"field": "PubMed ID", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "doi", "mandatory": False,
|
||||||
|
"biolomics": {"field": "DOI number", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "title", "mandatory": False,
|
||||||
|
"biolomics": {"field": "Title", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "authors", "mandatory": False,
|
||||||
|
"biolomics": {"field": "Authors", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "journal", "mandatory": False,
|
||||||
|
"biolomics": {"field": "Journal", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "volumen", "mandatory": False,
|
||||||
|
"biolomics": {"field": "Volume", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "issue", "mandatory": False,
|
||||||
|
"biolomics": {"field": "Issue", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "first_page", "mandatory": False,
|
||||||
|
"biolomics": {"field": "Page from", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "last_page", "mandatory": False,
|
||||||
|
"biolomics": {"field": "Page to", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "last_page", "label": "", "mandatory": False,
|
||||||
|
"biolomics": {"field": "", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "last_page", "label": "", "mandatory": False,
|
||||||
|
"biolomics": {"field": "", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "book_title", "label": "", "mandatory": False,
|
||||||
|
"biolomics": {"field": "Book title", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "publisher", "label": "", "mandatory": False,
|
||||||
|
"biolomics": {"field": "Publisher", "type": "E"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attribute": "editor", "label": "", "mandatory": False,
|
||||||
|
"biolomics": {"field": "Editor(s)", "type": "E"},
|
||||||
|
},
|
||||||
|
]
|
||||||
3603
mirri/data/ontobiotopes.csv
Normal file
3603
mirri/data/ontobiotopes.csv
Normal file
File diff suppressed because it is too large
Load Diff
0
mirri/entities/__init__.py
Normal file
0
mirri/entities/__init__.py
Normal file
45
mirri/entities/_private_classes.py
Normal file
45
mirri/entities/_private_classes.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
class FrozenClass(object):
|
||||||
|
__isfrozen = False
|
||||||
|
|
||||||
|
def __setattr__(self, key, value):
|
||||||
|
# print(dir(self))
|
||||||
|
if self.__isfrozen and not hasattr(self, key):
|
||||||
|
msg = f"Can not add {key} to {self.__class__.__name__}. It is not one of its attributes"
|
||||||
|
raise TypeError(msg)
|
||||||
|
object.__setattr__(self, key, value)
|
||||||
|
|
||||||
|
def _freeze(self):
|
||||||
|
self.__isfrozen = True
|
||||||
|
|
||||||
|
|
||||||
|
class _FieldBasedClass(FrozenClass):
|
||||||
|
_fields = []
|
||||||
|
|
||||||
|
def __init__(self, data=None, freeze=True):
|
||||||
|
self._data = {}
|
||||||
|
if data is None:
|
||||||
|
data = {}
|
||||||
|
for field in self._fields:
|
||||||
|
value = data.get(field["label"], None)
|
||||||
|
setattr(self, field["attribute"], value)
|
||||||
|
if freeze:
|
||||||
|
self._freeze()
|
||||||
|
|
||||||
|
def __eq__(self, o: object) -> bool:
|
||||||
|
for field in self._fields:
|
||||||
|
val1 = getattr(self, field["attribute"], None)
|
||||||
|
val2 = getattr(o, field["attribute"], None)
|
||||||
|
if val1 != val2:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def __bool__(self):
|
||||||
|
return bool(self.dict())
|
||||||
|
|
||||||
|
def dict(self):
|
||||||
|
data = {}
|
||||||
|
for field in self._fields:
|
||||||
|
value = getattr(self, field["attribute"])
|
||||||
|
if value is not None:
|
||||||
|
data[field["label"]] = value
|
||||||
|
return data
|
||||||
87
mirri/entities/date_range.py
Normal file
87
mirri/entities/date_range.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
from calendar import monthrange
|
||||||
|
from collections import OrderedDict
|
||||||
|
from copy import copy
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
|
|
||||||
|
class DateRange:
|
||||||
|
def __init__(self, year=None, month=None, day=None):
|
||||||
|
self._year = year
|
||||||
|
if month is not None and (month < 1 or month > 12):
|
||||||
|
raise ValueError("Month must be between 1 and 12")
|
||||||
|
self._month = month
|
||||||
|
if day is not None and (day < 1 or day > 31):
|
||||||
|
raise ValueError("Day must be between 1 and 31")
|
||||||
|
self._day = day
|
||||||
|
|
||||||
|
self._start = None
|
||||||
|
self._end = None
|
||||||
|
if year or month or day:
|
||||||
|
self._create_range()
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
_strdate = self.strfdate
|
||||||
|
if _strdate is None:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
return _strdate
|
||||||
|
|
||||||
|
def __bool__(self):
|
||||||
|
return bool(self._year or self._month or self._day)
|
||||||
|
|
||||||
|
def _create_range(self):
|
||||||
|
year = self._year
|
||||||
|
month = self._month
|
||||||
|
day = self._day
|
||||||
|
if year and month and day:
|
||||||
|
start_date = date(year=year, month=month, day=day)
|
||||||
|
end_date = date(year=year, month=month, day=day)
|
||||||
|
elif month is None:
|
||||||
|
start_date = date(year=year, month=1, day=1)
|
||||||
|
end_date = date(year=year, month=12, day=31)
|
||||||
|
elif day is None:
|
||||||
|
month_last_day = monthrange(year, month)[1]
|
||||||
|
start_date = date(year=year, month=month, day=1)
|
||||||
|
end_date = date(year=year, month=month, day=month_last_day)
|
||||||
|
|
||||||
|
self._start = start_date
|
||||||
|
self._end = end_date
|
||||||
|
|
||||||
|
def strpdate(self, date_str: str):
|
||||||
|
date_str = str(date_str)
|
||||||
|
orig_date = copy(date_str)
|
||||||
|
date_str = date_str.replace("/", "").replace("-", "")
|
||||||
|
if len(date_str) > 8:
|
||||||
|
msg = f"Malformed date, Mora caracters than expected: {orig_date}"
|
||||||
|
raise ValueError(msg)
|
||||||
|
month = None
|
||||||
|
day = None
|
||||||
|
if len(date_str) >= 4:
|
||||||
|
year = int(date_str[:4])
|
||||||
|
if len(date_str) >= 6:
|
||||||
|
month = int(date_str[4:6])
|
||||||
|
if month < 1 or month > 12:
|
||||||
|
raise ValueError("Month must be between 1 and 12")
|
||||||
|
if len(date_str) >= 8:
|
||||||
|
day = int(date_str[6:8])
|
||||||
|
if day is not None and (day < 1 or day > 31):
|
||||||
|
raise ValueError("Day must be between 1 and 31")
|
||||||
|
self._year = year
|
||||||
|
self._month = month
|
||||||
|
self._day = day
|
||||||
|
self._create_range()
|
||||||
|
return self
|
||||||
|
|
||||||
|
@property
|
||||||
|
def strfdate(self):
|
||||||
|
year = "----" if self._year is None else f"{self._start.year:04}"
|
||||||
|
month = "--" if self._month is None else f"{self._start.month:02}"
|
||||||
|
day = "--" if self._day is None else f"{self._start.day:02}"
|
||||||
|
_date = str(f"{year}{month}{day}")
|
||||||
|
if _date == "--------":
|
||||||
|
return None
|
||||||
|
return _date
|
||||||
|
|
||||||
|
@property
|
||||||
|
def range(self):
|
||||||
|
return OrderedDict([("start", self._start), ("end", self._end)])
|
||||||
47
mirri/entities/growth_medium.py
Normal file
47
mirri/entities/growth_medium.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
class GrowthMedium:
|
||||||
|
fields = ['record_id', 'record_name', 'acronym', 'full_description',
|
||||||
|
'ingredients', 'description', 'other_name', 'ph',
|
||||||
|
'sterilization_conditions']
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
self._data = {}
|
||||||
|
for field in self.fields:
|
||||||
|
if field in kwargs and kwargs['field'] is not None:
|
||||||
|
value = kwargs['field']
|
||||||
|
setattr(self, field, value)
|
||||||
|
|
||||||
|
def __setattr__(self, attr, value):
|
||||||
|
if attr == '_data':
|
||||||
|
super().__setattr__(attr, value)
|
||||||
|
return
|
||||||
|
if attr not in self.fields:
|
||||||
|
raise TypeError(f'{attr} not an allowed attribute')
|
||||||
|
self._data[attr] = value
|
||||||
|
|
||||||
|
def __getattr__(self, attr):
|
||||||
|
if attr == '_data':
|
||||||
|
return super
|
||||||
|
if attr not in self.fields and attr != '_data':
|
||||||
|
raise TypeError(f'{attr} not an allowed attribute')
|
||||||
|
return self._data.get(attr, None)
|
||||||
|
|
||||||
|
def dict(self):
|
||||||
|
return self._data
|
||||||
|
|
||||||
|
def update(self, growth_media, include_fields=None):
|
||||||
|
for field in self.fields:
|
||||||
|
if include_fields and field in include_fields:
|
||||||
|
new_value = getattr(growth_media, field, None)
|
||||||
|
actual_value = getattr(self, field, None)
|
||||||
|
if new_value is not None and new_value != actual_value:
|
||||||
|
setattr(self, field, new_value)
|
||||||
|
|
||||||
|
def is_equal(self, other, exclude_fields=[]):
|
||||||
|
for field in self.fields:
|
||||||
|
if field in exclude_fields:
|
||||||
|
continue
|
||||||
|
value_of_other = getattr(other, field, None)
|
||||||
|
value_of_self = getattr(self, field, None)
|
||||||
|
if value_of_self is not None and value_of_self != value_of_other:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
170
mirri/entities/location.py
Normal file
170
mirri/entities/location.py
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
import hashlib
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from mirri.entities._private_classes import _FieldBasedClass
|
||||||
|
from mirri.settings import (
|
||||||
|
ALTITUDE,
|
||||||
|
COORD_SPATIAL_REFERENCE,
|
||||||
|
COORDUNCERTAINTY,
|
||||||
|
COUNTRY,
|
||||||
|
GEOREF_METHOD,
|
||||||
|
ISLAND,
|
||||||
|
LATITUDE,
|
||||||
|
LONGITUDE,
|
||||||
|
MUNICIPALITY,
|
||||||
|
OTHER,
|
||||||
|
PROVINCE,
|
||||||
|
SITE,
|
||||||
|
STATE,
|
||||||
|
)
|
||||||
|
import pycountry
|
||||||
|
|
||||||
|
|
||||||
|
class Location(_FieldBasedClass):
|
||||||
|
_fields = [
|
||||||
|
{"attribute": "country", "label": COUNTRY},
|
||||||
|
{"attribute": "state", "label": STATE},
|
||||||
|
{"attribute": "province", "label": PROVINCE},
|
||||||
|
{"attribute": "municipality", "label": MUNICIPALITY},
|
||||||
|
{"attribute": "site", "label": SITE},
|
||||||
|
{"attribute": "other", "label": OTHER},
|
||||||
|
{"attribute": "island", "label": ISLAND},
|
||||||
|
{"attribute": "longitude", "label": LONGITUDE},
|
||||||
|
{"attribute": "latitude", "label": LATITUDE},
|
||||||
|
{"attribute": "altitude", "label": ALTITUDE},
|
||||||
|
{"attribute": "coord_spatial_reference", "label": COORD_SPATIAL_REFERENCE},
|
||||||
|
{"attribute": "coord_uncertainty", "label": COORDUNCERTAINTY},
|
||||||
|
{"attribute": "georef_method", "label": GEOREF_METHOD},
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
_site = []
|
||||||
|
if self.country:
|
||||||
|
_site.append(self.country)
|
||||||
|
if self.province:
|
||||||
|
_site.append(self.province)
|
||||||
|
if self.site:
|
||||||
|
_site.append(self.site)
|
||||||
|
if self.municipality:
|
||||||
|
_site.append(self.municipality)
|
||||||
|
|
||||||
|
return ": ".join(_site)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
hash_str = ''
|
||||||
|
for field in self._fields:
|
||||||
|
value = str(getattr(self, field, None))
|
||||||
|
hash_str += value
|
||||||
|
# hash_str = str(self.country) + str(self.province) + str(self.municipality) + str(self.site)
|
||||||
|
return int(hashlib.sha1(hash_str.encode("utf-8")).hexdigest(), 16) % (10 ** 8)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def country(self) -> Union[str, None]:
|
||||||
|
return self._data.get(COUNTRY, None)
|
||||||
|
|
||||||
|
@country.setter
|
||||||
|
def country(self, code3: str):
|
||||||
|
if code3 is not None:
|
||||||
|
_country = pycountry.countries.get(alpha_3=code3)
|
||||||
|
if _country is None:
|
||||||
|
_country = pycountry.historic_countries.get(alpha_3=code3)
|
||||||
|
if _country is None and code3 != 'INW':
|
||||||
|
raise ValueError(f'{code3}, not a valid 3 letter country name')
|
||||||
|
self._data[COUNTRY] = code3
|
||||||
|
|
||||||
|
@property
|
||||||
|
def province(self) -> Union[str, None]:
|
||||||
|
return self._data.get(PROVINCE, None)
|
||||||
|
|
||||||
|
@province.setter
|
||||||
|
def province(self, code3: str):
|
||||||
|
self._data[PROVINCE] = code3
|
||||||
|
|
||||||
|
@property
|
||||||
|
def municipality(self) -> Union[str, None]:
|
||||||
|
return self._data.get(MUNICIPALITY, None)
|
||||||
|
|
||||||
|
@municipality.setter
|
||||||
|
def municipality(self, name: str):
|
||||||
|
self._data[MUNICIPALITY] = name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site(self) -> Union[str, None]:
|
||||||
|
return self._data.get(SITE, None)
|
||||||
|
|
||||||
|
@site.setter
|
||||||
|
def site(self, name: str):
|
||||||
|
self._data[SITE] = name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def latitude(self):
|
||||||
|
return self._data.get(LATITUDE, None)
|
||||||
|
|
||||||
|
@latitude.setter
|
||||||
|
def latitude(self, latitude: float):
|
||||||
|
self._data[LATITUDE] = latitude
|
||||||
|
|
||||||
|
@property
|
||||||
|
def longitude(self) -> Union[float, None]:
|
||||||
|
return self._data.get(LONGITUDE, None)
|
||||||
|
|
||||||
|
@longitude.setter
|
||||||
|
def longitude(self, longitude: float):
|
||||||
|
self._data[LONGITUDE] = longitude
|
||||||
|
|
||||||
|
@property
|
||||||
|
def altitude(self) -> Union[int, float, None]:
|
||||||
|
return self._data.get(ALTITUDE, None)
|
||||||
|
|
||||||
|
@altitude.setter
|
||||||
|
def altitude(self, altitude: Union[int, float]):
|
||||||
|
self._data[ALTITUDE] = altitude
|
||||||
|
|
||||||
|
@property
|
||||||
|
def georef_method(self) -> Union[str, None]:
|
||||||
|
return self._data.get(GEOREF_METHOD, None)
|
||||||
|
|
||||||
|
@georef_method.setter
|
||||||
|
def georef_method(self, georef_method: str):
|
||||||
|
self._data[GEOREF_METHOD] = georef_method
|
||||||
|
|
||||||
|
@property
|
||||||
|
def coord_uncertainty(self) -> Union[str, None]:
|
||||||
|
return self._data.get(COORDUNCERTAINTY, None)
|
||||||
|
|
||||||
|
@coord_uncertainty.setter
|
||||||
|
def coord_uncertainty(self, coord_uncertainty: str):
|
||||||
|
self._data[COORDUNCERTAINTY] = coord_uncertainty
|
||||||
|
|
||||||
|
@property
|
||||||
|
def coord_spatial_reference(self) -> Union[str, None]:
|
||||||
|
return self._data.get(COORD_SPATIAL_REFERENCE, None)
|
||||||
|
|
||||||
|
@coord_spatial_reference.setter
|
||||||
|
def coord_spatial_reference(self, coord_spatial_reference: str):
|
||||||
|
self._data[COORD_SPATIAL_REFERENCE] = coord_spatial_reference
|
||||||
|
|
||||||
|
@property
|
||||||
|
def state(self) -> Union[str, None]:
|
||||||
|
return self._data.get(STATE, None)
|
||||||
|
|
||||||
|
@state.setter
|
||||||
|
def state(self, state):
|
||||||
|
self._data[STATE] = state
|
||||||
|
|
||||||
|
@property
|
||||||
|
def island(self) -> Union[str, None]:
|
||||||
|
return self._data.get(ISLAND, None)
|
||||||
|
|
||||||
|
@island.setter
|
||||||
|
def island(self, island):
|
||||||
|
self._data[ISLAND] = island
|
||||||
|
|
||||||
|
@property
|
||||||
|
def other(self) -> Union[str, None]:
|
||||||
|
return self._data.get(OTHER, None)
|
||||||
|
|
||||||
|
@other.setter
|
||||||
|
def other(self, other):
|
||||||
|
self._data[OTHER] = other
|
||||||
202
mirri/entities/publication.py
Normal file
202
mirri/entities/publication.py
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
from mirri.settings import (BOOK_EDITOR, BOOK_PUBLISHER, BOOK_TITLE,
|
||||||
|
PUB_AUTHORS, PUB_DOI, PUB_FIRST_PAGE, PUB_ID,
|
||||||
|
PUB_ISSUE, PUB_JOURNAL, PUB_LAST_PAGE,
|
||||||
|
PUB_PUBMED_ID, PUB_TITLE, PUB_VOLUME)
|
||||||
|
|
||||||
|
# Maybe we could implement some crossref calls to fill all field data
|
||||||
|
# and get DOI where ever is possible
|
||||||
|
|
||||||
|
RECORD_ID = 'RecordId'
|
||||||
|
RECORD_NAME = 'RecordName'
|
||||||
|
|
||||||
|
|
||||||
|
class Publication:
|
||||||
|
def __init__(self, data=None):
|
||||||
|
self._data = {}
|
||||||
|
if data:
|
||||||
|
self.record_id = data.get('RecordId', None)
|
||||||
|
self.record_name = data.get('RecordName', None)
|
||||||
|
self.pubmed_id = data.get(PUB_PUBMED_ID, None)
|
||||||
|
self.doi = data.get(PUB_DOI, None)
|
||||||
|
self.title = data.get(PUB_TITLE, None)
|
||||||
|
self.authors = data.get(PUB_AUTHORS, None)
|
||||||
|
self.journal = data.get(PUB_JOURNAL, None)
|
||||||
|
self.volume = data.get(PUB_VOLUME, None)
|
||||||
|
self.issue = data.get(PUB_ISSUE, None)
|
||||||
|
self.first_page = data.get(PUB_FIRST_PAGE, None)
|
||||||
|
self.last_page = data.get(PUB_LAST_PAGE, None)
|
||||||
|
self.editor = data.get(BOOK_EDITOR, None)
|
||||||
|
self.publisher = data.get(BOOK_PUBLISHER, None)
|
||||||
|
self.book_title = data.get(BOOK_TITLE, None)
|
||||||
|
self.isbn = data.get('ISBN', None)
|
||||||
|
self.issn = data.get('ISSN', None)
|
||||||
|
self.year = data.get('Year', None)
|
||||||
|
|
||||||
|
def __bool__(self):
|
||||||
|
return bool(self._data)
|
||||||
|
|
||||||
|
def dict(self):
|
||||||
|
return self._data
|
||||||
|
|
||||||
|
@property
|
||||||
|
def id(self) -> int:
|
||||||
|
return self._data.get(PUB_ID, None)
|
||||||
|
|
||||||
|
@id.setter
|
||||||
|
def id(self, value: int):
|
||||||
|
if value is not None:
|
||||||
|
self._data[PUB_ID] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def record_id(self) -> int:
|
||||||
|
return self._data.get(RECORD_ID, None)
|
||||||
|
|
||||||
|
@record_id.setter
|
||||||
|
def record_id(self, value: int):
|
||||||
|
if value is not None:
|
||||||
|
self._data[RECORD_ID] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def record_name(self) -> int:
|
||||||
|
return self._data.get(RECORD_NAME, None)
|
||||||
|
|
||||||
|
@record_name.setter
|
||||||
|
def record_name(self, value: int):
|
||||||
|
if value is not None:
|
||||||
|
self._data[RECORD_NAME] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pubmed_id(self):
|
||||||
|
return self._data.get(PUB_PUBMED_ID, None)
|
||||||
|
|
||||||
|
@pubmed_id.setter
|
||||||
|
def pubmed_id(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data[PUB_PUBMED_ID] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def isbn(self):
|
||||||
|
return self._data.get('ISBN', None)
|
||||||
|
|
||||||
|
@isbn.setter
|
||||||
|
def isbn(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data['ISBN'] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def issn(self):
|
||||||
|
return self._data.get('ISSN', None)
|
||||||
|
|
||||||
|
@issn.setter
|
||||||
|
def issn(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data['ISSN'] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def doi(self):
|
||||||
|
return self._data.get(PUB_DOI, None)
|
||||||
|
|
||||||
|
@doi.setter
|
||||||
|
def doi(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data[PUB_DOI] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def title(self):
|
||||||
|
return self._data.get(PUB_TITLE, None)
|
||||||
|
|
||||||
|
@title.setter
|
||||||
|
def title(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data[PUB_TITLE] = value
|
||||||
|
self._data[RECORD_NAME] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def authors(self):
|
||||||
|
return self._data.get(PUB_AUTHORS, None)
|
||||||
|
|
||||||
|
@authors.setter
|
||||||
|
def authors(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data[PUB_AUTHORS] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def journal(self):
|
||||||
|
return self._data.get(PUB_JOURNAL, None)
|
||||||
|
|
||||||
|
@journal.setter
|
||||||
|
def journal(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data[PUB_JOURNAL] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def volume(self):
|
||||||
|
return self._data.get(PUB_VOLUME, None)
|
||||||
|
|
||||||
|
@volume.setter
|
||||||
|
def volume(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data[PUB_VOLUME] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def issue(self):
|
||||||
|
return self._data.get(PUB_ISSUE, None)
|
||||||
|
|
||||||
|
@issue.setter
|
||||||
|
def issue(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data[PUB_ISSUE] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def first_page(self):
|
||||||
|
return self._data.get(PUB_FIRST_PAGE, None)
|
||||||
|
|
||||||
|
@first_page.setter
|
||||||
|
def first_page(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data[PUB_FIRST_PAGE] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def last_page(self):
|
||||||
|
return self._data.get(PUB_LAST_PAGE, None)
|
||||||
|
|
||||||
|
@last_page.setter
|
||||||
|
def last_page(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data[PUB_LAST_PAGE] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def book_title(self):
|
||||||
|
return self._data.get(BOOK_TITLE, None)
|
||||||
|
|
||||||
|
@book_title.setter
|
||||||
|
def book_title(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data[BOOK_TITLE] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def editors(self):
|
||||||
|
return self._data.get(BOOK_EDITOR, None)
|
||||||
|
|
||||||
|
@editors.setter
|
||||||
|
def editors(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data[BOOK_EDITOR] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def publisher(self):
|
||||||
|
return self._data.get(BOOK_PUBLISHER, None)
|
||||||
|
|
||||||
|
@publisher.setter
|
||||||
|
def publisher(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
self._data[BOOK_PUBLISHER] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def year(self) -> int:
|
||||||
|
return self._data.get('Year', None)
|
||||||
|
|
||||||
|
@year.setter
|
||||||
|
def year(self, value: int):
|
||||||
|
if value is not None:
|
||||||
|
self._data['Year'] = value
|
||||||
45
mirri/entities/sequence.py
Normal file
45
mirri/entities/sequence.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
from mirri.entities._private_classes import _FieldBasedClass
|
||||||
|
from mirri.settings import (
|
||||||
|
ALLOWED_MARKER_TYPES,
|
||||||
|
MARKER_INSDC,
|
||||||
|
MARKER_SEQ,
|
||||||
|
MARKER_TYPE)
|
||||||
|
|
||||||
|
from mirri import ValidationError
|
||||||
|
|
||||||
|
|
||||||
|
class GenomicSequence(_FieldBasedClass):
|
||||||
|
_fields = [
|
||||||
|
{"attribute": "marker_type", "label": MARKER_TYPE},
|
||||||
|
{"attribute": "marker_id", "label": MARKER_INSDC},
|
||||||
|
{"attribute": "marker_seq", "label": MARKER_SEQ},
|
||||||
|
]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def marker_type(self):
|
||||||
|
return self._data.get(MARKER_TYPE, None)
|
||||||
|
|
||||||
|
@marker_type.setter
|
||||||
|
def marker_type(self, value: str):
|
||||||
|
if value is not None:
|
||||||
|
types = " ".join([m["acronym"] for m in ALLOWED_MARKER_TYPES])
|
||||||
|
if value not in types:
|
||||||
|
msg = f"{value} not in allowed marker types: {types}"
|
||||||
|
raise ValidationError(msg)
|
||||||
|
self._data[MARKER_TYPE] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def marker_id(self) -> str:
|
||||||
|
return self._data.get(MARKER_INSDC, None)
|
||||||
|
|
||||||
|
@marker_id.setter
|
||||||
|
def marker_id(self, value: str):
|
||||||
|
self._data[MARKER_INSDC] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def marker_seq(self) -> str:
|
||||||
|
return self._data.get(MARKER_SEQ, None)
|
||||||
|
|
||||||
|
@marker_seq.setter
|
||||||
|
def marker_seq(self, value: str):
|
||||||
|
self._data[MARKER_SEQ] = value
|
||||||
1243
mirri/entities/strain.py
Normal file
1243
mirri/entities/strain.py
Normal file
File diff suppressed because it is too large
Load Diff
0
mirri/io/__init__.py
Normal file
0
mirri/io/__init__.py
Normal file
0
mirri/io/parsers/__init__.py
Normal file
0
mirri/io/parsers/__init__.py
Normal file
79
mirri/io/parsers/excel.py
Normal file
79
mirri/io/parsers/excel.py
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
from io import BytesIO
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
|
||||||
|
|
||||||
|
def excel_dict_reader(fhand, sheet_name, mandatory_column_name=None):
|
||||||
|
fhand.seek(0)
|
||||||
|
wb = load_workbook(filename=BytesIO(fhand.read()), data_only=True,
|
||||||
|
read_only=True)
|
||||||
|
return workbook_sheet_reader(wb, sheet_name, mandatory_column_name=mandatory_column_name)
|
||||||
|
|
||||||
|
|
||||||
|
def is_none(value):
|
||||||
|
return value is None
|
||||||
|
|
||||||
|
|
||||||
|
def workbook_sheet_reader(workbook, sheet_name, mandatory_column_name=None,
|
||||||
|
allowed_empty_line_slots=5):
|
||||||
|
try:
|
||||||
|
sheet = workbook[sheet_name]
|
||||||
|
except KeyError as error:
|
||||||
|
raise ValueError(f"The '{sheet_name}' sheet is missing.") from error
|
||||||
|
|
||||||
|
first = True
|
||||||
|
header = []
|
||||||
|
empty_lines = 0
|
||||||
|
for row in sheet.rows:
|
||||||
|
values = []
|
||||||
|
for cell in row:
|
||||||
|
if cell.value is not None and cell.data_type == 's':
|
||||||
|
value = str(cell.value).strip()
|
||||||
|
else:
|
||||||
|
value = cell.value
|
||||||
|
values.append(value)
|
||||||
|
# values = [cell.value.strip() for cell in row]
|
||||||
|
if first:
|
||||||
|
header = values
|
||||||
|
first = False
|
||||||
|
continue
|
||||||
|
if not any(values):
|
||||||
|
empty_lines += 1
|
||||||
|
if empty_lines >= allowed_empty_line_slots:
|
||||||
|
break
|
||||||
|
continue
|
||||||
|
empty_lines = 0
|
||||||
|
|
||||||
|
data = dict(zip(header, values))
|
||||||
|
if mandatory_column_name is not None and not data[mandatory_column_name]:
|
||||||
|
# msg = f"Exiting before end of sheet {sheet_name} ends.\n"
|
||||||
|
# msg += f"Mandatory column ({mandatory_column_name}) empty. \n"
|
||||||
|
# msg += "Check file for empty lines"
|
||||||
|
# print(msg)
|
||||||
|
continue
|
||||||
|
yield data
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_cell_data_from_sheet(workbook, sheet_name, allowed_empty_line_slots=5):
|
||||||
|
try:
|
||||||
|
sheet = workbook[sheet_name]
|
||||||
|
except KeyError as error:
|
||||||
|
raise ValueError(f"The '{sheet_name}' sheet is missing.") from error
|
||||||
|
|
||||||
|
empty_lines = 0
|
||||||
|
all_values = []
|
||||||
|
for row in sheet.rows:
|
||||||
|
values = []
|
||||||
|
for cell in row:
|
||||||
|
if cell.value is not None and cell.data_type == 's':
|
||||||
|
value = str(cell.value).strip()
|
||||||
|
else:
|
||||||
|
value = cell.value
|
||||||
|
values.append(value)
|
||||||
|
if not any(values):
|
||||||
|
empty_lines += 1
|
||||||
|
if empty_lines >= allowed_empty_line_slots:
|
||||||
|
break
|
||||||
|
continue
|
||||||
|
empty_lines = 0
|
||||||
|
all_values.extend(values)
|
||||||
|
return all_values
|
||||||
276
mirri/io/parsers/mirri_excel.py
Normal file
276
mirri/io/parsers/mirri_excel.py
Normal file
@ -0,0 +1,276 @@
|
|||||||
|
import re
|
||||||
|
from datetime import date
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
import pycountry
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
|
||||||
|
from mirri import rsetattr, ValidationError
|
||||||
|
from mirri.biolomics.serializers.sequence import GenomicSequenceBiolomics
|
||||||
|
from mirri.biolomics.serializers.strain import StrainMirri
|
||||||
|
from mirri.entities.growth_medium import GrowthMedium
|
||||||
|
from mirri.io.parsers.excel import workbook_sheet_reader
|
||||||
|
from mirri.entities.publication import Publication
|
||||||
|
from mirri.entities.date_range import DateRange
|
||||||
|
from mirri.entities.strain import OrganismType, StrainId, add_taxon_to_strain
|
||||||
|
from mirri.settings import (COMMERCIAL_USE_WITH_AGREEMENT, GENOMIC_INFO,
|
||||||
|
GROWTH_MEDIA, LITERATURE_SHEET, LOCATIONS,
|
||||||
|
MIRRI_FIELDS, NAGOYA_DOCS_AVAILABLE, NAGOYA_NO_RESTRICTIONS,
|
||||||
|
NAGOYA_PROBABLY_SCOPE, NO_RESTRICTION,
|
||||||
|
ONLY_RESEARCH, ONTOBIOTOPE,
|
||||||
|
PUBLICATION_FIELDS, STRAINS, SUBTAXAS)
|
||||||
|
from mirri.utils import get_country_from_name
|
||||||
|
|
||||||
|
RESTRICTION_USE_TRANSLATOR = {
|
||||||
|
1: NO_RESTRICTION,
|
||||||
|
2: ONLY_RESEARCH,
|
||||||
|
3: COMMERCIAL_USE_WITH_AGREEMENT,
|
||||||
|
}
|
||||||
|
NAGOYA_TRANSLATOR = {
|
||||||
|
1: NAGOYA_NO_RESTRICTIONS,
|
||||||
|
2: NAGOYA_DOCS_AVAILABLE,
|
||||||
|
3: NAGOYA_PROBABLY_SCOPE,
|
||||||
|
}
|
||||||
|
TRUEFALSE_TRANSLATOR = {
|
||||||
|
1: False,
|
||||||
|
2: True
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_mirri_excel(fhand, version="20200601"):
|
||||||
|
if version == "20200601":
|
||||||
|
return _parse_mirri_v20200601(fhand)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("Only version 20200601 is implemented")
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_mirri_v20200601(fhand):
|
||||||
|
fhand.seek(0)
|
||||||
|
file_content = BytesIO(fhand.read())
|
||||||
|
wb = load_workbook(filename=file_content, read_only=True, data_only=True)
|
||||||
|
|
||||||
|
locations = workbook_sheet_reader(wb, LOCATIONS)
|
||||||
|
ontobiotopes = workbook_sheet_reader(wb, ONTOBIOTOPE)
|
||||||
|
|
||||||
|
growth_media = list(parse_growth_media(wb))
|
||||||
|
|
||||||
|
markers = workbook_sheet_reader(wb, GENOMIC_INFO)
|
||||||
|
|
||||||
|
publications = list(parse_publications(wb))
|
||||||
|
|
||||||
|
strains = parse_strains(wb, locations=locations, growth_media=growth_media,
|
||||||
|
markers=markers, publications=publications,
|
||||||
|
ontobiotopes=ontobiotopes)
|
||||||
|
|
||||||
|
return {"strains": strains, "growth_media": growth_media}
|
||||||
|
|
||||||
|
|
||||||
|
def index_list_by(list_, id_):
|
||||||
|
return {str(item[id_]): item for item in list_}
|
||||||
|
|
||||||
|
|
||||||
|
def index_list_by_attr(list_, id_):
|
||||||
|
return {str(getattr(item, id_)): item for item in list_}
|
||||||
|
|
||||||
|
|
||||||
|
def index_markers(markers):
|
||||||
|
indexed_markers = {}
|
||||||
|
for marker in markers:
|
||||||
|
strain_id = marker["Strain AN"]
|
||||||
|
if strain_id not in indexed_markers:
|
||||||
|
indexed_markers[strain_id] = []
|
||||||
|
indexed_markers[strain_id].append(marker)
|
||||||
|
return indexed_markers
|
||||||
|
|
||||||
|
|
||||||
|
def remove_hard_lines(string=None):
|
||||||
|
if string is not None and string != '':
|
||||||
|
return re.sub(r'\r+\n+|\t+', '', string).strip()
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def parse_growth_media(wb):
|
||||||
|
for row in workbook_sheet_reader(wb, GROWTH_MEDIA):
|
||||||
|
gm = GrowthMedium()
|
||||||
|
gm.acronym = str(row['Acronym'])
|
||||||
|
gm.description = row['Description']
|
||||||
|
gm.full_description = remove_hard_lines(row.get('Full description', None))
|
||||||
|
|
||||||
|
yield gm
|
||||||
|
|
||||||
|
|
||||||
|
def parse_publications(wb):
|
||||||
|
ids = []
|
||||||
|
for row in workbook_sheet_reader(wb, LITERATURE_SHEET):
|
||||||
|
pub = Publication()
|
||||||
|
for pub_field in PUBLICATION_FIELDS:
|
||||||
|
label = pub_field["label"]
|
||||||
|
col_val = row.get(label, None)
|
||||||
|
|
||||||
|
if col_val:
|
||||||
|
attribute = pub_field["attribute"]
|
||||||
|
setattr(pub, attribute, col_val)
|
||||||
|
yield pub
|
||||||
|
|
||||||
|
|
||||||
|
def parse_strains(wb, locations, growth_media, markers, publications,
|
||||||
|
ontobiotopes):
|
||||||
|
|
||||||
|
ontobiotopes_by_id = {str(ont["ID"]): ont['Name'] for ont in ontobiotopes}
|
||||||
|
ontobiotopes_by_name = {v: k for k, v in ontobiotopes_by_id.items()}
|
||||||
|
|
||||||
|
locations = index_list_by(locations, 'Locality')
|
||||||
|
growth_media = index_list_by_attr(growth_media, 'acronym')
|
||||||
|
publications = index_list_by_attr(publications, 'id')
|
||||||
|
markers = index_markers(markers)
|
||||||
|
|
||||||
|
for strain_row in workbook_sheet_reader(wb, STRAINS, "Accession number"):
|
||||||
|
strain = StrainMirri()
|
||||||
|
strain_id = None
|
||||||
|
label = None
|
||||||
|
for field in MIRRI_FIELDS:
|
||||||
|
label = field["label"]
|
||||||
|
attribute = field["attribute"]
|
||||||
|
value = strain_row[label]
|
||||||
|
if value is None or value == '':
|
||||||
|
continue
|
||||||
|
|
||||||
|
if attribute == "id":
|
||||||
|
collection, number = value.split(" ", 1)
|
||||||
|
value = StrainId(collection=collection, number=number)
|
||||||
|
rsetattr(strain, attribute, value)
|
||||||
|
|
||||||
|
elif attribute == "restriction_on_use":
|
||||||
|
rsetattr(strain, attribute, RESTRICTION_USE_TRANSLATOR[value])
|
||||||
|
elif attribute == "nagoya_protocol":
|
||||||
|
rsetattr(strain, attribute, NAGOYA_TRANSLATOR[value])
|
||||||
|
elif attribute == "other_numbers":
|
||||||
|
other_numbers = []
|
||||||
|
for on in value.split(";"):
|
||||||
|
on = on.strip()
|
||||||
|
try:
|
||||||
|
collection, number = on.split(" ", 1)
|
||||||
|
except ValueError:
|
||||||
|
collection = None
|
||||||
|
number = on
|
||||||
|
_id = StrainId(collection=collection, number=number)
|
||||||
|
other_numbers.append(_id)
|
||||||
|
rsetattr(strain, attribute, other_numbers)
|
||||||
|
elif attribute == "taxonomy.taxon_name":
|
||||||
|
try:
|
||||||
|
add_taxon_to_strain(strain, value)
|
||||||
|
except ValueError:
|
||||||
|
msg = f"The '{label}' for strain with Accession Number {strain_id} is not according to the specification."
|
||||||
|
raise ValidationError(msg)
|
||||||
|
elif attribute == "taxonomy.organism_type":
|
||||||
|
value = [OrganismType(val.strip())
|
||||||
|
for val in str(value).split(";")]
|
||||||
|
rsetattr(strain, attribute, value)
|
||||||
|
elif attribute in ("deposit.date", "collect.date", "isolation.date",
|
||||||
|
"catalog_inclusion_date"):
|
||||||
|
if isinstance(value, date):
|
||||||
|
value = DateRange(
|
||||||
|
year=value.year, month=value.month, day=value.day
|
||||||
|
)
|
||||||
|
elif isinstance(value, str):
|
||||||
|
value = DateRange().strpdate(value)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError()
|
||||||
|
rsetattr(strain, attribute, value)
|
||||||
|
elif attribute == 'growth.recommended_temp':
|
||||||
|
temps = value.split(';')
|
||||||
|
if len(temps) == 1:
|
||||||
|
_min, _max = float(temps[0]), float(temps[0])
|
||||||
|
else:
|
||||||
|
_min, _max = float(temps[0]), float(temps[1])
|
||||||
|
rsetattr(strain, attribute, {'min': _min, 'max': _max})
|
||||||
|
elif attribute == "growth.recommended_media":
|
||||||
|
sep = "/"
|
||||||
|
if ";" in value:
|
||||||
|
sep = ";"
|
||||||
|
growth_media = [v.strip() for v in value.split(sep)]
|
||||||
|
rsetattr(strain, attribute, growth_media)
|
||||||
|
elif attribute == 'growth.tested_temp_range':
|
||||||
|
if value:
|
||||||
|
min_, max_ = value.split(";")
|
||||||
|
value = {'min': float(min_), 'max': float(max_)}
|
||||||
|
rsetattr(strain, attribute, value)
|
||||||
|
elif attribute == "form_of_supply":
|
||||||
|
rsetattr(strain, attribute, value.split(";"))
|
||||||
|
elif attribute == "collect.location.coords":
|
||||||
|
items = value.split(";")
|
||||||
|
strain.collect.location.latitude = float(items[0])
|
||||||
|
strain.collect.location.longitude = float(items[1])
|
||||||
|
if len(items) > 2:
|
||||||
|
strain.collect.location.coord_uncertainty = items[2]
|
||||||
|
|
||||||
|
elif attribute == "collect.location":
|
||||||
|
location = locations[value]
|
||||||
|
if 'Country' in location and location['Country']:
|
||||||
|
if location['Country'] == 'Unknown':
|
||||||
|
continue
|
||||||
|
country_3 = _get_country_alpha3(location['Country'])
|
||||||
|
strain.collect.location.country = country_3
|
||||||
|
strain.collect.location.state = location["Region"]
|
||||||
|
strain.collect.location.municipality = location["City"]
|
||||||
|
strain.collect.location.site = location["Locality"]
|
||||||
|
elif attribute in ("abs_related_files", "mta_files"):
|
||||||
|
rsetattr(strain, attribute, value.split(";"))
|
||||||
|
elif attribute in ("is_from_registered_collection",
|
||||||
|
"is_subject_to_quarantine", 'taxonomy.interspecific_hybrid',
|
||||||
|
"is_potentially_harmful", "genetics.gmo"):
|
||||||
|
rsetattr(strain, attribute, TRUEFALSE_TRANSLATOR[value])
|
||||||
|
elif attribute == "publications":
|
||||||
|
value = str(value)
|
||||||
|
pubs = []
|
||||||
|
pub_ids = [v.strip() for v in str(value).split(";")]
|
||||||
|
for pub_id in pub_ids:
|
||||||
|
pub = publications.get(pub_id, None)
|
||||||
|
if pub is None:
|
||||||
|
pub = Publication()
|
||||||
|
if '/' in pub_id:
|
||||||
|
pub.doi = pub_id
|
||||||
|
else:
|
||||||
|
pub.pubmed_id = pub_id
|
||||||
|
pubs.append(pub)
|
||||||
|
rsetattr(strain, attribute, pubs)
|
||||||
|
elif attribute == 'ontobiotope':
|
||||||
|
values = []
|
||||||
|
for val in value.split(';'):
|
||||||
|
if val not in ontobiotopes_by_id:
|
||||||
|
val = ontobiotopes_by_name[val]
|
||||||
|
values.append(val)
|
||||||
|
rsetattr(strain, attribute, value)
|
||||||
|
elif attribute == 'other_denominations':
|
||||||
|
value = [v.strip() for v in value.split(';')]
|
||||||
|
rsetattr(strain, attribute, value)
|
||||||
|
elif attribute == 'genetics.plasmids':
|
||||||
|
value = [v.strip() for v in value.split(';')]
|
||||||
|
rsetattr(strain, attribute, value)
|
||||||
|
else:
|
||||||
|
#print(attribute, value, type(value))
|
||||||
|
rsetattr(strain, attribute, value)
|
||||||
|
|
||||||
|
# add markers
|
||||||
|
strain_id = strain.id.strain_id
|
||||||
|
if strain_id in markers:
|
||||||
|
for marker in markers[strain_id]:
|
||||||
|
_marker = GenomicSequenceBiolomics()
|
||||||
|
_marker.marker_id = marker["INSDC AN"]
|
||||||
|
_marker.marker_type = marker["Marker"]
|
||||||
|
_marker.marker_seq = marker["Sequence"]
|
||||||
|
strain.genetics.markers.append(_marker)
|
||||||
|
yield strain
|
||||||
|
|
||||||
|
|
||||||
|
def _get_country_alpha3(loc_country):
|
||||||
|
if loc_country == 'INW':
|
||||||
|
return loc_country
|
||||||
|
country = get_country_from_name(loc_country)
|
||||||
|
if not country:
|
||||||
|
country = pycountry.countries.get(alpha_3=loc_country)
|
||||||
|
if not country:
|
||||||
|
country = pycountry.historic_countries.get(alpha_3=loc_country)
|
||||||
|
country_3 = country.alpha_3
|
||||||
|
return country_3
|
||||||
0
mirri/io/writers/__init__.py
Normal file
0
mirri/io/writers/__init__.py
Normal file
305
mirri/io/writers/mirri_excel.py
Normal file
305
mirri/io/writers/mirri_excel.py
Normal file
@ -0,0 +1,305 @@
|
|||||||
|
import csv
|
||||||
|
from copy import deepcopy
|
||||||
|
from openpyxl.workbook.workbook import Workbook
|
||||||
|
|
||||||
|
|
||||||
|
from mirri import rgetattr
|
||||||
|
from mirri.settings import GROWTH_MEDIA, MIRRI_FIELDS, DATA_DIR, PUBLICATION_FIELDS
|
||||||
|
from mirri.io.parsers.mirri_excel import NAGOYA_TRANSLATOR, RESTRICTION_USE_TRANSLATOR
|
||||||
|
|
||||||
|
INITIAL_SEXUAL_STATES = [
|
||||||
|
"Mata",
|
||||||
|
"Matalpha",
|
||||||
|
"Mata/Matalpha",
|
||||||
|
"Mata",
|
||||||
|
"Matb",
|
||||||
|
"Mata/Matb",
|
||||||
|
"MTLa",
|
||||||
|
"MTLalpha",
|
||||||
|
"MTLa/MTLalpha",
|
||||||
|
"MAT1-1",
|
||||||
|
"MAT1-2",
|
||||||
|
"MAT1",
|
||||||
|
"MAT2",
|
||||||
|
"MT+",
|
||||||
|
"MT-",
|
||||||
|
"MT+",
|
||||||
|
"MT-",
|
||||||
|
"H+",
|
||||||
|
"H-",
|
||||||
|
]
|
||||||
|
MARKER_FIELDS = [
|
||||||
|
{"attribute": "acronym", "label": "Acronym", "mandatory": True},
|
||||||
|
{"attribute": "marker", "label": "Marker", "mandatory": True},
|
||||||
|
]
|
||||||
|
MARKER_DATA = [
|
||||||
|
{"acronym": "16S rRNA", "marker": "16S rRNA"},
|
||||||
|
{"acronym": "ACT", "marker": "Actin"},
|
||||||
|
{"acronym": "CaM", "marker": "Calmodulin"},
|
||||||
|
{"acronym": "EF-1α", "marker": "elongation factor 1-alpha (EF-1α)"},
|
||||||
|
{"acronym": "ITS", "marker": "nuclear ribosomal Internal Transcribed Spacer (ITS)"},
|
||||||
|
{"acronym": "LSU", "marker": "nuclear ribosomal Large SubUnit (LSU)"},
|
||||||
|
{"acronym": "RPB1", "marker": "Ribosomal RNA-coding genes RPB1"},
|
||||||
|
{"acronym": "RPB2", "marker": "Ribosomal RNA-coding genes RPB2"},
|
||||||
|
{"acronym": "TUBB", "marker": "β-Tubulin"},
|
||||||
|
]
|
||||||
|
|
||||||
|
REV_RESTRICTION_USE_TRANSLATOR = {v: k for k, v in RESTRICTION_USE_TRANSLATOR.items()}
|
||||||
|
REV_NAGOYA_TRANSLATOR = {v: k for k, v in NAGOYA_TRANSLATOR.items()}
|
||||||
|
PUB_HEADERS = [pb["label"] for pb in PUBLICATION_FIELDS]
|
||||||
|
|
||||||
|
|
||||||
|
def write_mirri_excel(path, strains, growth_media, version):
|
||||||
|
if version == "20200601":
|
||||||
|
_write_mirri_excel_20200601(path, strains, growth_media)
|
||||||
|
|
||||||
|
|
||||||
|
def _write_mirri_excel_20200601(path, strains, growth_media):
|
||||||
|
wb = Workbook()
|
||||||
|
|
||||||
|
write_markers_sheet(wb)
|
||||||
|
|
||||||
|
ontobiotope_path = DATA_DIR / "ontobiotopes.csv"
|
||||||
|
write_ontobiotopes(wb, ontobiotope_path)
|
||||||
|
|
||||||
|
write_growth_media(wb, growth_media)
|
||||||
|
growth_media_indexes = [str(gm.acronym) for gm in growth_media]
|
||||||
|
|
||||||
|
locations = {}
|
||||||
|
publications = {}
|
||||||
|
sexual_states = set(deepcopy(INITIAL_SEXUAL_STATES))
|
||||||
|
genomic_markers = {}
|
||||||
|
strains_data = _deserialize_strains(strains, locations, growth_media_indexes,
|
||||||
|
publications, sexual_states, genomic_markers)
|
||||||
|
strains_data = list(strains_data)
|
||||||
|
|
||||||
|
# write strain to generate indexed data
|
||||||
|
strain_sheet = wb.create_sheet("Strains")
|
||||||
|
strain_sheet.append([field["label"] for field in MIRRI_FIELDS])
|
||||||
|
for strain_row in strains_data:
|
||||||
|
strain_sheet.append(strain_row)
|
||||||
|
redimension_cell_width(strain_sheet)
|
||||||
|
|
||||||
|
# write locations
|
||||||
|
loc_sheet = wb.create_sheet("Geographic origin")
|
||||||
|
loc_sheet.append(["ID", "Country", "Region", "City", "Locality"])
|
||||||
|
for index, loc_index in enumerate(locations.keys()):
|
||||||
|
location = locations[loc_index]
|
||||||
|
row = [index, location.country, location.state, location.municipality,
|
||||||
|
loc_index]
|
||||||
|
loc_sheet.append(row)
|
||||||
|
redimension_cell_width(loc_sheet)
|
||||||
|
|
||||||
|
# write publications
|
||||||
|
pub_sheet = wb.create_sheet("Literature")
|
||||||
|
pub_sheet.append(PUB_HEADERS)
|
||||||
|
for publication in publications.values():
|
||||||
|
row = []
|
||||||
|
for pub_field in PUBLICATION_FIELDS:
|
||||||
|
# if pub_field['attribute'] == 'id':
|
||||||
|
# value = index
|
||||||
|
value = getattr(publication, pub_field['attribute'], None)
|
||||||
|
row.append(value)
|
||||||
|
pub_sheet.append(row)
|
||||||
|
redimension_cell_width(pub_sheet)
|
||||||
|
|
||||||
|
# write sexual states
|
||||||
|
sex_sheet = wb.create_sheet("Sexual states")
|
||||||
|
for sex_state in sorted(list(sexual_states)):
|
||||||
|
sex_sheet.append([sex_state])
|
||||||
|
redimension_cell_width(sex_sheet)
|
||||||
|
|
||||||
|
# write genetic markers
|
||||||
|
markers_sheet = wb.create_sheet("Genomic information")
|
||||||
|
markers_sheet.append(['Strain AN', 'Marker', 'INSDC AN', 'Sequence'])
|
||||||
|
for strain_id, markers in genomic_markers.items():
|
||||||
|
for marker in markers:
|
||||||
|
row = [strain_id, marker.marker_type, marker.marker_id, marker.marker_seq]
|
||||||
|
markers_sheet.append(row)
|
||||||
|
redimension_cell_width(markers_sheet)
|
||||||
|
|
||||||
|
del wb["Sheet"]
|
||||||
|
wb.save(str(path))
|
||||||
|
|
||||||
|
|
||||||
|
def _deserialize_strains(strains, locations, growth_media_indexes,
|
||||||
|
publications, sexual_states, genomic_markers):
|
||||||
|
for strain in strains:
|
||||||
|
strain_row = []
|
||||||
|
for field in MIRRI_FIELDS:
|
||||||
|
attribute = field["attribute"]
|
||||||
|
|
||||||
|
if attribute == "id":
|
||||||
|
value = strain.id.strain_id
|
||||||
|
elif attribute == "restriction_on_use":
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
if value is not None:
|
||||||
|
value = REV_RESTRICTION_USE_TRANSLATOR[value]
|
||||||
|
elif attribute == "nagoya_protocol":
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
if value:
|
||||||
|
value = REV_NAGOYA_TRANSLATOR[value]
|
||||||
|
elif attribute == "other_numbers":
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
if value is not None:
|
||||||
|
value = [f"{on.collection} {on.number}" for on in value]
|
||||||
|
value = "; ".join(value)
|
||||||
|
elif attribute == 'other_denominations':
|
||||||
|
od = strain.other_denominations
|
||||||
|
value = "; ".join(od) if od else None
|
||||||
|
elif attribute in (
|
||||||
|
"is_from_registered_collection",
|
||||||
|
"is_subject_to_quarantine",
|
||||||
|
"is_potentially_harmful",
|
||||||
|
"genetics.gmo",
|
||||||
|
"taxonomy.interspecific_hybrid"
|
||||||
|
):
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
if value is True:
|
||||||
|
value = 2
|
||||||
|
elif value is False:
|
||||||
|
value = 1
|
||||||
|
else:
|
||||||
|
value = None
|
||||||
|
elif attribute == "taxonomy.taxon_name":
|
||||||
|
value = strain.taxonomy.long_name
|
||||||
|
elif attribute in ("deposit.date", "collect.date", "isolation.date",
|
||||||
|
'catalog_inclusion_date'):
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
value = value.strfdate if value else None
|
||||||
|
elif attribute == "growth.recommended_media":
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
if value is not None:
|
||||||
|
for gm in value:
|
||||||
|
gm = str(gm)
|
||||||
|
if gm not in growth_media_indexes:
|
||||||
|
print(gm, growth_media_indexes)
|
||||||
|
msg = f"Growth media {gm} not in the provided ones"
|
||||||
|
continue
|
||||||
|
raise ValueError(msg)
|
||||||
|
value = "/".join(value)
|
||||||
|
elif attribute in ('growth.tested_temp_range',
|
||||||
|
"growth.recommended_temp"):
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
if value:
|
||||||
|
value = f'{value["min"]}; {value["max"]}'
|
||||||
|
elif attribute == "form_of_supply":
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
value = ";".join(value)
|
||||||
|
elif attribute == "collect.location.coords":
|
||||||
|
lat = strain.collect.location.latitude
|
||||||
|
long = strain.collect.location.longitude
|
||||||
|
if lat is not None and long is not None:
|
||||||
|
value = f"{lat};{long}"
|
||||||
|
else:
|
||||||
|
value = None
|
||||||
|
|
||||||
|
elif attribute == "collect.location":
|
||||||
|
location = strain.collect.location
|
||||||
|
loc_index = _build_location_index(location)
|
||||||
|
if loc_index is None:
|
||||||
|
continue
|
||||||
|
if loc_index not in locations:
|
||||||
|
locations[loc_index] = location
|
||||||
|
value = loc_index
|
||||||
|
elif attribute in ("abs_related_files", "mta_files"):
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
value = ";".join(value) if value else None
|
||||||
|
elif attribute == "taxonomy.organism_type":
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
if value:
|
||||||
|
value = "; ".join([str(v.code) for v in value])
|
||||||
|
|
||||||
|
elif attribute == "history":
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
if value is not None:
|
||||||
|
value = " < ".join(value)
|
||||||
|
elif attribute == "genetics.sexual_state":
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
if value:
|
||||||
|
sexual_states.add(value)
|
||||||
|
elif attribute == "genetics.ploidy":
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
elif attribute == "taxonomy.organism_type":
|
||||||
|
organism_types = rgetattr(strain, attribute)
|
||||||
|
if organism_types is not None:
|
||||||
|
value = [org_type.code for org_type in organism_types]
|
||||||
|
value = ";".join(value)
|
||||||
|
elif attribute == 'publications':
|
||||||
|
value = []
|
||||||
|
for pub in strain.publications:
|
||||||
|
value.append(pub.id)
|
||||||
|
if pub.id not in publications:
|
||||||
|
publications[pub.id] = pub
|
||||||
|
value = ';'.join(str(v) for v in value) if value else None
|
||||||
|
elif attribute == 'genetics.plasmids':
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
if value is not None:
|
||||||
|
value = ';'.join(value)
|
||||||
|
else:
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
|
||||||
|
strain_row.append(value)
|
||||||
|
genomic_markers[strain.id.strain_id] = strain.genetics.markers
|
||||||
|
yield strain_row
|
||||||
|
|
||||||
|
|
||||||
|
def _build_location_index(location):
|
||||||
|
index = []
|
||||||
|
if location.country:
|
||||||
|
index.append(location.country)
|
||||||
|
if location.site:
|
||||||
|
index.append(location.site)
|
||||||
|
return ';'.join(index) if index else None
|
||||||
|
|
||||||
|
|
||||||
|
def write_markers_sheet(wb):
|
||||||
|
sheet = wb.create_sheet("Markers")
|
||||||
|
_write_work_sheet(
|
||||||
|
sheet,
|
||||||
|
labels=[f["label"] for f in MARKER_FIELDS],
|
||||||
|
attributes=[f["attribute"] for f in MARKER_FIELDS],
|
||||||
|
data=MARKER_DATA,
|
||||||
|
)
|
||||||
|
redimension_cell_width(sheet)
|
||||||
|
|
||||||
|
|
||||||
|
def write_ontobiotopes(workbook, ontobiotype_path):
|
||||||
|
ws = workbook.create_sheet("Ontobiotope")
|
||||||
|
with ontobiotype_path.open() as fhand:
|
||||||
|
for row in csv.reader(fhand, delimiter="\t"):
|
||||||
|
ws.append(row)
|
||||||
|
redimension_cell_width(ws)
|
||||||
|
|
||||||
|
|
||||||
|
def _write_work_sheet(sheet, labels, attributes, data):
|
||||||
|
sheet.append(labels)
|
||||||
|
for row in data:
|
||||||
|
row_data = [row[field] for field in attributes]
|
||||||
|
sheet.append(row_data)
|
||||||
|
|
||||||
|
redimension_cell_width(sheet)
|
||||||
|
|
||||||
|
|
||||||
|
def write_growth_media(wb, growth_media):
|
||||||
|
ws = wb.create_sheet(GROWTH_MEDIA)
|
||||||
|
ws.append(["Acronym", "Description", "Full description"])
|
||||||
|
for growth_medium in growth_media:
|
||||||
|
row = [
|
||||||
|
growth_medium.acronym,
|
||||||
|
growth_medium.description,
|
||||||
|
growth_medium.full_description,
|
||||||
|
]
|
||||||
|
ws.append(row)
|
||||||
|
redimension_cell_width(ws)
|
||||||
|
|
||||||
|
|
||||||
|
def redimension_cell_width(ws):
|
||||||
|
dims = {}
|
||||||
|
for row in ws.rows:
|
||||||
|
for cell in row:
|
||||||
|
if cell.value:
|
||||||
|
max_ = max((dims.get(cell.column_letter, 0), len(str(cell.value))))
|
||||||
|
dims[cell.column_letter] = max_
|
||||||
|
for col, value in dims.items():
|
||||||
|
ws.column_dimensions[col].width = value
|
||||||
296
mirri/settings.py
Normal file
296
mirri/settings.py
Normal file
@ -0,0 +1,296 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
DATA_DIR = Path(__file__).parent / "data"
|
||||||
|
|
||||||
|
ACCESSION_NUMBER = "accession_number"
|
||||||
|
RESTRICTION_ON_USE = "restriction_on_use"
|
||||||
|
NAGOYA_PROTOCOL = "nagoya_protocol"
|
||||||
|
ABS_RELATED_FILES = "abs_related_files"
|
||||||
|
MTA_FILES = "mta_file"
|
||||||
|
OTHER_CULTURE_NUMBERS = "other_culture_collection_numbers"
|
||||||
|
STRAIN_FROM_REGISTERED_COLLECTION = "strain_from_a_registered_collection"
|
||||||
|
RISK_GROUP = "risk_group"
|
||||||
|
DUAL_USE = "dual_use"
|
||||||
|
QUARANTINE = "quarantine"
|
||||||
|
ORGANISM_TYPE = "organism_type"
|
||||||
|
TAXON_NAME = "taxon_name"
|
||||||
|
INFRASUBSPECIFIC_NAME = "infrasubspecific_names"
|
||||||
|
COMMENTS_ON_TAXONOMY = "comments_on_taxonomy"
|
||||||
|
STATUS = "status"
|
||||||
|
HISTORY_OF_DEPOSIT = "history_of_deposit"
|
||||||
|
DEPOSITOR = "depositor"
|
||||||
|
DATE_OF_DEPOSIT = "date_of_deposit"
|
||||||
|
COLLECTED_BY = "collected_by"
|
||||||
|
DATE_OF_COLLECTION = "date_of_collection"
|
||||||
|
ISOLATED_BY = "isolated_by"
|
||||||
|
DATE_OF_ISOLATION = "date_of_isolation"
|
||||||
|
DATE_OF_INCLUSION = "date_of_inclusion_on_catalog"
|
||||||
|
TESTED_TEMPERATURE_GROWTH_RANGE = "tested_temperature_growth_range"
|
||||||
|
RECOMMENDED_GROWTH_TEMP = "recommended_growth_temperature"
|
||||||
|
RECOMMENDED_GROWTH_MEDIUM = "recommended_media_for_growth"
|
||||||
|
FORM_OF_SUPPLY = "form_of_supply"
|
||||||
|
GEO_COORDS = "coordinates_of_geographic_origin"
|
||||||
|
ACCESSION_NAME = "other_denomination"
|
||||||
|
ALTITUDE = "altitude_of_geographic_origin"
|
||||||
|
GEOGRAPHIC_ORIGIN = "geographic_origin"
|
||||||
|
GMO = "gmo"
|
||||||
|
GMO_CONSTRUCTION_INFO = "gmo_construction_information"
|
||||||
|
MUTANT_INFORMATION = "mutant_information"
|
||||||
|
GENOTYPE = "genotype"
|
||||||
|
LITERATURE = "literature"
|
||||||
|
SEXUAL_STATE = "sexual_state"
|
||||||
|
PLOIDY = "ploidy"
|
||||||
|
INTERSPECIFIC_HYBRID = "interspecific_hybrid"
|
||||||
|
HYBRIDS = 'hybrids'
|
||||||
|
PLANT_PATHOGENICITY_CODE = "plant_pathogenicity_code"
|
||||||
|
PATHOGENICITY = "pathogenicity"
|
||||||
|
ENZYME_PRODUCTION = "enzyme_production"
|
||||||
|
PRODUCTION_OF_METABOLITES = "production_of_metabolites"
|
||||||
|
APPLICATIONS = "applications"
|
||||||
|
REMARKS = "remarks"
|
||||||
|
PLASMIDS = "plasmids"
|
||||||
|
PLASMIDS_COLLECTION_FIELDS = "plasmids_collections_fields"
|
||||||
|
SUBSTRATE_HOST_OF_ISOLATION = "substrate_host_of_isolation"
|
||||||
|
ISOLATION_HABITAT = "isolation_habitat"
|
||||||
|
ONTOBIOTOPE_ISOLATION_HABITAT = "ontobiotope_term_for_the_isolation_habitat"
|
||||||
|
LITERATURE_LINKED_TO_SEQ_GENOME = "literature_linked_to_the_sequence_genome"
|
||||||
|
|
||||||
|
# StrainId
|
||||||
|
STRAIN_ID = "id"
|
||||||
|
COLLECTION_CODE = "collection_code"
|
||||||
|
STRAIN_PUI = "strain_pui"
|
||||||
|
STRAIN_URL = "strain_url"
|
||||||
|
|
||||||
|
ID_SYNONYMS = 'id_synonyms'
|
||||||
|
# Taxonomy
|
||||||
|
GENUS = "genus"
|
||||||
|
SPECIES = "species"
|
||||||
|
|
||||||
|
# Location
|
||||||
|
COUNTRY = "countryOfOriginCode"
|
||||||
|
SITE = "site"
|
||||||
|
STATE = "state"
|
||||||
|
PROVINCE = "province"
|
||||||
|
MUNICIPALITY = "municipality"
|
||||||
|
ISLAND = "island"
|
||||||
|
OTHER = "other"
|
||||||
|
LATITUDE = "latitude"
|
||||||
|
LONGITUDE = "longitude"
|
||||||
|
ALTITUDE = "altitude"
|
||||||
|
GEOREF_METHOD = "georeferencingMethod"
|
||||||
|
COORDUNCERTAINTY = "coordUncertainty"
|
||||||
|
COORD_SPATIAL_REFERENCE = "coordenatesSpatialReference"
|
||||||
|
LOCATION = "location"
|
||||||
|
|
||||||
|
ALLOWED_COLLECTING_SITE_KEYS = [
|
||||||
|
COUNTRY,
|
||||||
|
STATE,
|
||||||
|
PROVINCE,
|
||||||
|
ISLAND,
|
||||||
|
MUNICIPALITY,
|
||||||
|
OTHER,
|
||||||
|
SITE,
|
||||||
|
LATITUDE,
|
||||||
|
LONGITUDE,
|
||||||
|
ALTITUDE,
|
||||||
|
GEOREF_METHOD,
|
||||||
|
COORDUNCERTAINTY,
|
||||||
|
COORD_SPATIAL_REFERENCE,
|
||||||
|
]
|
||||||
|
|
||||||
|
MIRRI_FIELDS = [
|
||||||
|
{"attribute": "id", "label": "Accession number"},
|
||||||
|
{"attribute": "restriction_on_use", "label": "Restrictions on use"},
|
||||||
|
{"attribute": "nagoya_protocol",
|
||||||
|
"label": "Nagoya protocol restrictions and compliance conditions"},
|
||||||
|
{"attribute": ABS_RELATED_FILES, "label": "ABS related files"},
|
||||||
|
{"attribute": "mta_files", "label": "MTA file"},
|
||||||
|
{"attribute": "other_numbers", "label": "Other culture collection numbers"},
|
||||||
|
{"attribute": "is_from_registered_collection",
|
||||||
|
"label": "Strain from a registered collection"},
|
||||||
|
{"attribute": "risk_group", "label": "Risk Group"},
|
||||||
|
{"attribute": "is_potentially_harmful", "label": "Dual use"},
|
||||||
|
{"attribute": "is_subject_to_quarantine", "label": "Quarantine in Europe"},
|
||||||
|
{"attribute": "taxonomy.organism_type", "label": "Organism type"},
|
||||||
|
{"attribute": "taxonomy.taxon_name", "label": "Taxon name"},
|
||||||
|
{"attribute": "taxonomy.infrasubspecific_name",
|
||||||
|
"label": "Infrasubspecific names"},
|
||||||
|
{"attribute": "taxonomy.comments", "label": "Comment on taxonomy"},
|
||||||
|
{"attribute": "taxonomy.interspecific_hybrid",
|
||||||
|
"label": "Interspecific hybrid"},
|
||||||
|
{"attribute": "status", "label": "Status"},
|
||||||
|
{"attribute": "history", "label": "History of deposit", },
|
||||||
|
{"attribute": "deposit.who", "label": "Depositor"},
|
||||||
|
{"attribute": "deposit.date", "label": "Date of deposit"},
|
||||||
|
{"attribute": "catalog_inclusion_date",
|
||||||
|
"label": "Date of inclusion in the catalogue"},
|
||||||
|
{"attribute": "collect.who", "label": "Collected by"},
|
||||||
|
{"attribute": "collect.date", "label": "Date of collection"},
|
||||||
|
{"attribute": "isolation.who", "label": "Isolated by"},
|
||||||
|
{"attribute": "isolation.date", "label": "Date of isolation"},
|
||||||
|
{"attribute": "isolation.substrate_host_of_isolation",
|
||||||
|
"label": "Substrate/host of isolation"},
|
||||||
|
{"attribute": "growth.tested_temp_range",
|
||||||
|
"label": "Tested temperature growth range"},
|
||||||
|
{"attribute": "growth.recommended_temp",
|
||||||
|
"label": "Recommended growth temperature"},
|
||||||
|
{"attribute": "growth.recommended_media",
|
||||||
|
"label": "Recommended medium for growth"},
|
||||||
|
{"attribute": "form_of_supply", "label": "Form of supply"},
|
||||||
|
{"attribute": "other_denominations", "label": "Other denomination"},
|
||||||
|
{"attribute": "collect.location.coords",
|
||||||
|
"label": "Coordinates of geographic origin"},
|
||||||
|
{"attribute": "collect.location.altitude",
|
||||||
|
"label": "Altitude of geographic origin"},
|
||||||
|
{"attribute": "collect.location", "label": "Geographic origin"},
|
||||||
|
{"attribute": "collect.habitat", "label": "Isolation habitat"},
|
||||||
|
{"attribute": "collect.habitat_ontobiotope",
|
||||||
|
"label": "Ontobiotope term for the isolation habitat"},
|
||||||
|
{"attribute": "genetics.gmo", "label": "GMO"},
|
||||||
|
{"attribute": "genetics.gmo_construction",
|
||||||
|
"label": "GMO construction information"},
|
||||||
|
{"attribute": "genetics.mutant_info", "label": "Mutant information"},
|
||||||
|
{"attribute": "genetics.genotype", "label": "Genotype"},
|
||||||
|
{"attribute": "genetics.sexual_state", "label": "Sexual state"},
|
||||||
|
{"attribute": "genetics.ploidy", "label": "Ploidy"},
|
||||||
|
{"attribute": "genetics.plasmids", "label": "Plasmids"},
|
||||||
|
{"attribute": "genetics.plasmids_in_collections",
|
||||||
|
"label": "Plasmids collections fields"},
|
||||||
|
{"attribute": "publications", "label": "Literature"},
|
||||||
|
{"attribute": PLANT_PATHOGENICITY_CODE, "label": "Plant pathogenicity code"},
|
||||||
|
{"attribute": "pathogenicity", "label": "Pathogenicity"},
|
||||||
|
{"attribute": "enzyme_production", "label": "Enzyme production"},
|
||||||
|
{"attribute": "production_of_metabolites",
|
||||||
|
"label": "Production of metabolites"},
|
||||||
|
{"attribute": "applications", "label": "Applications", },
|
||||||
|
{"attribute": "remarks", "label": "Remarks"},
|
||||||
|
{"attribute": LITERATURE_LINKED_TO_SEQ_GENOME,
|
||||||
|
"label": "Literature linked to the sequence/genome"},
|
||||||
|
]
|
||||||
|
|
||||||
|
ALLOWED_SUBTAXA = ["subspecies", "variety", "convarietas", "group", "forma",
|
||||||
|
'forma.specialis']
|
||||||
|
ALLOWED_TAXONOMIC_RANKS = ["family", "genus", "species"] + ALLOWED_SUBTAXA
|
||||||
|
|
||||||
|
# nagoya
|
||||||
|
NAGOYA_NO_RESTRICTIONS = "no_known_restrictions_under_the_Nagoya_protocol"
|
||||||
|
NAGOYA_DOCS_AVAILABLE = "documents_providing_proof_of_legal_access_and_terms_of_use_available_at_the_collection"
|
||||||
|
NAGOYA_PROBABLY_SCOPE = "strain_probably_in_scope,_please_contact_the_culture_collection"
|
||||||
|
|
||||||
|
ALLOWED_NAGOYA_OPTIONS = [NAGOYA_NO_RESTRICTIONS,
|
||||||
|
NAGOYA_DOCS_AVAILABLE, NAGOYA_PROBABLY_SCOPE]
|
||||||
|
|
||||||
|
# Use restriction
|
||||||
|
NO_RESTRICTION = "no_restriction"
|
||||||
|
ONLY_RESEARCH = "only_research"
|
||||||
|
COMMERCIAL_USE_WITH_AGREEMENT = "commercial_use_with_agreement"
|
||||||
|
|
||||||
|
ALLOWED_RESTRICTION_USE_OPTIONS = [
|
||||||
|
NO_RESTRICTION,
|
||||||
|
ONLY_RESEARCH,
|
||||||
|
COMMERCIAL_USE_WITH_AGREEMENT,
|
||||||
|
]
|
||||||
|
|
||||||
|
ALLOWED_RISK_GROUPS = ["1", "2", "3", "4"]
|
||||||
|
|
||||||
|
AGAR = "Agar"
|
||||||
|
CRYO = "Cryo"
|
||||||
|
DRY_ICE = "Dry Ice"
|
||||||
|
LIQUID_CULTURE_MEDIUM = "Liquid Culture Medium"
|
||||||
|
LYO = "Lyo"
|
||||||
|
OIL = "Oil"
|
||||||
|
WATER = "Water"
|
||||||
|
ALLOWED_FORMS_OF_SUPPLY = [AGAR, CRYO, DRY_ICE,
|
||||||
|
LIQUID_CULTURE_MEDIUM, LYO, OIL, WATER]
|
||||||
|
|
||||||
|
DEPOSIT = "deposit"
|
||||||
|
ISOLATION = "isolation"
|
||||||
|
COLLECT = "collect"
|
||||||
|
GROWTH = "growth"
|
||||||
|
GENETICS = "genetics"
|
||||||
|
TAXONOMY = "taxonomy"
|
||||||
|
# Markers
|
||||||
|
MARKERS = "markers"
|
||||||
|
MARKER_TYPE = "marker_type"
|
||||||
|
MARKER_INSDC = "INSDC"
|
||||||
|
MARKER_SEQ = "marker_seq"
|
||||||
|
ALLOWED_MARKER_TYPES = [
|
||||||
|
{"acronym": "16S rRNA", "marker": "16S rRNA"},
|
||||||
|
{"acronym": "ACT", "marker": "Actin"},
|
||||||
|
{"acronym": "CaM", "marker": "Calmodulin"},
|
||||||
|
{"acronym": "EF-1α", "marker": "elongation factor 1-alpha (EF-1α)"},
|
||||||
|
{"acronym": "ITS",
|
||||||
|
"marker": "nuclear ribosomal Internal Transcribed Spacer (ITS)"},
|
||||||
|
{"acronym": "LSU", "marker": "nuclear ribosomal Large SubUnit (LSU)"},
|
||||||
|
{"acronym": "RPB1", "marker": "Ribosomal RNA-coding genes RPB1"},
|
||||||
|
{"acronym": "RPB2", "marker": "Ribosomal RNA-coding genes RPB2"},
|
||||||
|
{"acronym": "TUBB", "marker": "β-Tubulin"},
|
||||||
|
]
|
||||||
|
|
||||||
|
PUBLICATIONS = "publications"
|
||||||
|
PUB_ID = "id"
|
||||||
|
PUB_DOI = "pub_doi"
|
||||||
|
PUB_PUBMED_ID = ''
|
||||||
|
PUB_FULL_REFERENCE = "full_reference"
|
||||||
|
PUB_TITLE = "title"
|
||||||
|
PUB_AUTHORS = "authors"
|
||||||
|
PUB_JOURNAL = "journal"
|
||||||
|
PUB_YEAR = "year"
|
||||||
|
PUB_VOLUME = "volume"
|
||||||
|
PUB_ISSUE = "issue"
|
||||||
|
PUB_FIRST_PAGE = "first_page"
|
||||||
|
PUB_LAST_PAGE = "last_page"
|
||||||
|
BOOK_TITLE = "book_title"
|
||||||
|
BOOK_EDITOR = "book_editor"
|
||||||
|
BOOK_PUBLISHER = "book_publisher"
|
||||||
|
|
||||||
|
|
||||||
|
PUBLICATION_FIELDS = [
|
||||||
|
{"label": "ID", "attribute": PUB_ID},
|
||||||
|
{"label": "Full reference", "attribute": PUB_FULL_REFERENCE},
|
||||||
|
{"label": "Authors", "attribute": PUB_AUTHORS},
|
||||||
|
{"label": "Title", "attribute": PUB_TITLE},
|
||||||
|
{"label": "Journal", "attribute": PUB_JOURNAL},
|
||||||
|
{"label": "Year", "attribute": PUB_YEAR},
|
||||||
|
{"label": "Volume", "attribute": PUB_VOLUME},
|
||||||
|
{"label": "Issue", "attribute": PUB_ISSUE},
|
||||||
|
{"label": "First page", "attribute": PUB_FIRST_PAGE},
|
||||||
|
{"label": "Last page", "attribute": PUB_FIRST_PAGE},
|
||||||
|
{"label": "Book title", "attribute": BOOK_TITLE},
|
||||||
|
{"label": "Editors", "attribute": BOOK_EDITOR},
|
||||||
|
{"label": "Publisher", "attribute": BOOK_PUBLISHER},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ploidy
|
||||||
|
ANEUPLOID = 0
|
||||||
|
HAPLOID = 1
|
||||||
|
DIPLOID = 2
|
||||||
|
TRIPLOID = 3
|
||||||
|
TETRAPLOID = 4
|
||||||
|
POLYPLOID = 9
|
||||||
|
|
||||||
|
ALLOWED_PLOIDIES = [ANEUPLOID, HAPLOID, DIPLOID, TRIPLOID, TETRAPLOID,
|
||||||
|
POLYPLOID]
|
||||||
|
|
||||||
|
SUBTAXAS = {
|
||||||
|
"subsp.": "subspecies",
|
||||||
|
"var.": "variety",
|
||||||
|
"convar.": "convarietas",
|
||||||
|
"group.": "group",
|
||||||
|
"f.": "forma",
|
||||||
|
"f.sp.": "forma.specialis"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Excel sheet name
|
||||||
|
LOCATIONS = "Geographic origin" # 'Locations'
|
||||||
|
GROWTH_MEDIA = "Growth media"
|
||||||
|
GENOMIC_INFO = "Genomic information"
|
||||||
|
STRAINS = "Strains"
|
||||||
|
LITERATURE_SHEET = "Literature"
|
||||||
|
SEXUAL_STATE_SHEET = "Sexual states"
|
||||||
|
RESOURCE_TYPES_VALUES = "Resource types values"
|
||||||
|
FORM_OF_SUPPLY_SHEET = "Forms of supply"
|
||||||
|
PLOIDY_SHEET = "Ploidy"
|
||||||
|
ONTOBIOTOPE = "Ontobiotope"
|
||||||
|
MARKERS = "Markers"
|
||||||
48
mirri/utils.py
Normal file
48
mirri/utils.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
import pycountry
|
||||||
|
|
||||||
|
|
||||||
|
class FakeCountry:
|
||||||
|
def __init__(self, name=None, code3=None):
|
||||||
|
self.code3 = code3
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
|
||||||
|
def get_pycountry(value):
|
||||||
|
if value == 'INW':
|
||||||
|
return FakeCountry(name='International Water', code3='INW')
|
||||||
|
|
||||||
|
country = get_country_from_name(value)
|
||||||
|
if country is None:
|
||||||
|
country = get_country_from_alpha3(value)
|
||||||
|
return country
|
||||||
|
|
||||||
|
|
||||||
|
def get_country_from_name(name):
|
||||||
|
country = pycountry.countries.get(name=name)
|
||||||
|
try:
|
||||||
|
if country is None:
|
||||||
|
country = pycountry.countries.get(common_name=name)
|
||||||
|
if country is None:
|
||||||
|
country = pycountry.countries.get(official_name=name)
|
||||||
|
if country is None:
|
||||||
|
country = pycountry.historic_countries.get(name=name)
|
||||||
|
if country is None:
|
||||||
|
country = pycountry.historic_countries.get(common_name=name)
|
||||||
|
if country is None:
|
||||||
|
country = pycountry.historic_countries.get(official_name=name)
|
||||||
|
except (AttributeError, KeyError):
|
||||||
|
country = None
|
||||||
|
|
||||||
|
return country
|
||||||
|
|
||||||
|
|
||||||
|
def get_country_from_alpha3(code):
|
||||||
|
country = pycountry.countries.get(alpha_3=code)
|
||||||
|
try:
|
||||||
|
if country is None:
|
||||||
|
country = pycountry.historic_countries.get(alpha_3=code)
|
||||||
|
|
||||||
|
except (AttributeError, KeyError):
|
||||||
|
country = None
|
||||||
|
|
||||||
|
return country
|
||||||
0
mirri/validation/__init__.py
Normal file
0
mirri/validation/__init__.py
Normal file
50
mirri/validation/entity_validators.py
Normal file
50
mirri/validation/entity_validators.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
from mirri import rgetattr
|
||||||
|
|
||||||
|
|
||||||
|
def validate_strain(strain, version='20200601'):
|
||||||
|
if version == '20200601':
|
||||||
|
return _validate_strain_v20200601(strain)
|
||||||
|
raise NotImplementedError('Only v20200601 is implemented')
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_strain_v20200601(strain):
|
||||||
|
mandatory_attrs = [{'label': 'Accession Number', 'attr': 'id.strain_id'},
|
||||||
|
{'label': 'Nagoya protocol', 'attr': 'nagoya_protocol'},
|
||||||
|
{'label': 'Restriction on use', 'attr': 'restriction_on_use'},
|
||||||
|
{'label': 'Risk group', 'attr': 'risk_group'},
|
||||||
|
{'label': 'Organism type', 'attr': 'taxonomy.organism_type'},
|
||||||
|
{'label': 'Taxon name', 'attr': 'taxonomy.long_name'},
|
||||||
|
{'label': 'Recommended temperature to growth', 'attr': 'growth.recommended_temp'},
|
||||||
|
{'label': 'Recommended media', 'attr': 'growth.recommended_media'},
|
||||||
|
{'label': 'Form of supply', 'attr': 'form_of_supply'},
|
||||||
|
{'label': 'Country', 'attr': 'collect.location.country'}]
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
for mandatory in mandatory_attrs:
|
||||||
|
value = rgetattr(strain, mandatory['attr'])
|
||||||
|
if value is None:
|
||||||
|
errors.append(f"{mandatory['label']} is mandatory field")
|
||||||
|
|
||||||
|
if not is_valid_nagoya(strain):
|
||||||
|
errors.append('Not compliant wih nagoya protocol requirements')
|
||||||
|
|
||||||
|
return errors
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_nagoya(strain):
|
||||||
|
# nagoya_requirements
|
||||||
|
_date = strain.collect.date
|
||||||
|
if _date is None:
|
||||||
|
_date = strain.isolation.date
|
||||||
|
if _date is None:
|
||||||
|
_date = strain.deposit.date
|
||||||
|
if _date is None:
|
||||||
|
_date = strain.catalog_inclusion_date
|
||||||
|
# print(_date)
|
||||||
|
year = None if _date is None else _date._year
|
||||||
|
|
||||||
|
if year is not None and year >= 2014 and strain.collect.location.country is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
3
mirri/validation/error_logging/__init__.py
Normal file
3
mirri/validation/error_logging/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from .error import Entity, Error
|
||||||
|
from .error_message import ErrorMessage
|
||||||
|
from .error_log import ErrorLog
|
||||||
119
mirri/validation/error_logging/error.py
Normal file
119
mirri/validation/error_logging/error.py
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
from typing import Optional
|
||||||
|
from .error_message import ErrorMessage
|
||||||
|
|
||||||
|
|
||||||
|
class Entity():
|
||||||
|
"""Entity information
|
||||||
|
|
||||||
|
Args:
|
||||||
|
acronym: acronym of the entity. Must be a 3-characters captalized string
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, acronym: str) -> None:
|
||||||
|
self.acronym = acronym
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return f"Entity {self.acronym}: {self.name}"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _acronyms(self) -> list:
|
||||||
|
return [
|
||||||
|
func
|
||||||
|
for func in dir(self)
|
||||||
|
if func.isupper() and
|
||||||
|
callable(getattr(self, func)) and
|
||||||
|
not func.startswith("__")
|
||||||
|
]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _names(self) -> dict:
|
||||||
|
return {acr: getattr(self, acr)() for acr in self._acronyms}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self) -> str:
|
||||||
|
try:
|
||||||
|
return self._names[self.acronym]
|
||||||
|
except KeyError:
|
||||||
|
raise KeyError(f'Unknown acronym {self.acronym}.')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def acronym(self) -> str:
|
||||||
|
return self._acronym
|
||||||
|
|
||||||
|
@acronym.setter
|
||||||
|
def acronym(self, acronym: str) -> None:
|
||||||
|
self._acronym = acronym
|
||||||
|
|
||||||
|
def EFS(self) -> str:
|
||||||
|
return 'Excel File Structure'
|
||||||
|
|
||||||
|
def GMD(self) -> str:
|
||||||
|
return 'Growth Media'
|
||||||
|
|
||||||
|
def GOD(self) -> str:
|
||||||
|
return 'Geographic Origin'
|
||||||
|
|
||||||
|
def LID(self) -> str:
|
||||||
|
return 'Literature'
|
||||||
|
|
||||||
|
def STD(self) -> str:
|
||||||
|
return 'Strains'
|
||||||
|
|
||||||
|
def GID(self) -> str:
|
||||||
|
return 'Genomic Information'
|
||||||
|
|
||||||
|
def OTD(self) -> str:
|
||||||
|
return 'Ontobiotope'
|
||||||
|
|
||||||
|
def UCT(self) -> str:
|
||||||
|
return 'Uncategorized'
|
||||||
|
|
||||||
|
|
||||||
|
class Error():
|
||||||
|
"""Error information
|
||||||
|
|
||||||
|
Args:
|
||||||
|
message (str): Error message
|
||||||
|
entity (Entity, optional): Entity related to the error. If None will default to Uncategorized. Defaults to None.
|
||||||
|
data (str, optional): Data used for sorting the messages. Defaults to None.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, code: str, pk: Optional[str] = None, data: Optional[str] = None) -> None:
|
||||||
|
self.code = code.upper()
|
||||||
|
self.pk = pk
|
||||||
|
self.data = data
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"Error {self._code}: {self.message}"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def code(self) -> str:
|
||||||
|
return self._code
|
||||||
|
|
||||||
|
@code.setter
|
||||||
|
def code(self, code: str) -> None:
|
||||||
|
self._code = code.upper()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pk(self) -> Optional[str]:
|
||||||
|
return self._pk
|
||||||
|
|
||||||
|
@pk.setter
|
||||||
|
def pk(self, pk: Optional[str] = None) -> None:
|
||||||
|
self._pk = pk
|
||||||
|
|
||||||
|
@property
|
||||||
|
def data(self) -> Optional[str]:
|
||||||
|
return self._data
|
||||||
|
|
||||||
|
@data.setter
|
||||||
|
def data(self, data: Optional[str]):
|
||||||
|
self._data = data
|
||||||
|
|
||||||
|
@property
|
||||||
|
def entity(self) -> Entity:
|
||||||
|
return Entity(self.code[:3])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def message(self) -> str:
|
||||||
|
return ErrorMessage(self.code, self.pk, self.data).message
|
||||||
77
mirri/validation/error_logging/error_log.py
Normal file
77
mirri/validation/error_logging/error_log.py
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
from typing import Optional, Union
|
||||||
|
from datetime import datetime
|
||||||
|
from .error import Error
|
||||||
|
|
||||||
|
|
||||||
|
class ErrorLog():
|
||||||
|
def __init__(self, input_filename: str, cc: Optional[str] = None, date: Optional[Union[str, datetime]] = None, limit: int = 100):
|
||||||
|
"""
|
||||||
|
Logger for Error instances.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_filename (str): name of the file to be logged
|
||||||
|
cc (str, optional): name of the curator. Defaults to None.
|
||||||
|
date (str, optional): date (e.g. created, last modified) associated with the file. Useful for versioning. Defaults to None.
|
||||||
|
limit (int, optional): limit of errors to print to the report. Defaults to 100.
|
||||||
|
"""
|
||||||
|
self._input_filename = input_filename
|
||||||
|
self._cc = cc
|
||||||
|
self._date = date
|
||||||
|
self._errors = {}
|
||||||
|
self.limit = limit
|
||||||
|
self._counter = 0
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
output = f"""Error Log for file {self._input_filename}\nENTITY | CODE | MESSAGE"""
|
||||||
|
for acronym, error_list in self.get_errors().items():
|
||||||
|
for error in error_list:
|
||||||
|
output += f"\n{acronym:6} | {error.code:6} | {error.message[:100]}"
|
||||||
|
return output
|
||||||
|
|
||||||
|
@property
|
||||||
|
def input_filename(self) -> str:
|
||||||
|
return self._input_filename
|
||||||
|
|
||||||
|
@input_filename.setter
|
||||||
|
def input_filename(self, input_filename: str) -> None:
|
||||||
|
self._input_filename = input_filename
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cc(self) -> Optional[str]:
|
||||||
|
return self._cc
|
||||||
|
|
||||||
|
@cc.setter
|
||||||
|
def cc(self, cc: Optional[str]) -> None:
|
||||||
|
self._cc = cc
|
||||||
|
|
||||||
|
@property
|
||||||
|
def date(self) -> Optional[Union[str, datetime]]:
|
||||||
|
return self._date
|
||||||
|
|
||||||
|
@date.setter
|
||||||
|
def date(self, date: Optional[Union[str, datetime]] = None) -> None:
|
||||||
|
if isinstance(date, str):
|
||||||
|
self._date = datetime.strptime(date, r'%d-%m-%Y')
|
||||||
|
else:
|
||||||
|
self._date = date
|
||||||
|
|
||||||
|
def get_errors(self) -> dict:
|
||||||
|
"""
|
||||||
|
Get all errors
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Error intances grouped by entity acronym.
|
||||||
|
"""
|
||||||
|
return self._errors
|
||||||
|
|
||||||
|
def add_error(self, error: Error) -> None:
|
||||||
|
"""
|
||||||
|
Add an error.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
error (Error): Error instance.
|
||||||
|
"""
|
||||||
|
if error.entity.acronym not in self._errors:
|
||||||
|
self._errors[error.entity.acronym] = [error]
|
||||||
|
else:
|
||||||
|
self._errors[error.entity.acronym].append(error)
|
||||||
408
mirri/validation/error_logging/error_message.py
Normal file
408
mirri/validation/error_logging/error_message.py
Normal file
@ -0,0 +1,408 @@
|
|||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
class ErrorMessage():
|
||||||
|
"""Error message
|
||||||
|
|
||||||
|
Args:
|
||||||
|
code (str): Error code.
|
||||||
|
pk (str | optional): The instance's primary key that triggered the error. Defaults to None.
|
||||||
|
value (str | optional): The instance's value that triggered the error. Defaults to None.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, code: str, pk: Optional[str] = None, value: Optional[str] = None):
|
||||||
|
self.code = code.upper()
|
||||||
|
self.pk = pk
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _codes(self) -> list:
|
||||||
|
return [
|
||||||
|
func
|
||||||
|
for func in dir(self)
|
||||||
|
if func.isupper() and
|
||||||
|
callable(getattr(self, func)) and
|
||||||
|
not func.startswith("__")
|
||||||
|
]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _messages(self) -> dict:
|
||||||
|
return {code: getattr(self, code) for code in self._codes}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def message(self) -> str:
|
||||||
|
if not self._validate_code():
|
||||||
|
raise ValueError(f"{self.code} not found")
|
||||||
|
return self._messages[self.code]()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def code(self) -> str:
|
||||||
|
return self._code
|
||||||
|
|
||||||
|
@code.setter
|
||||||
|
def code(self, code: str) -> None:
|
||||||
|
self._code = code.upper()
|
||||||
|
|
||||||
|
def _validate_code(self) -> bool:
|
||||||
|
return self.code in self._codes
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pk(self) -> str:
|
||||||
|
return self._pk
|
||||||
|
|
||||||
|
@pk.setter
|
||||||
|
def pk(self, pk: str) -> None:
|
||||||
|
self._pk = pk
|
||||||
|
|
||||||
|
@property
|
||||||
|
def value(self) -> str:
|
||||||
|
return self._value
|
||||||
|
|
||||||
|
@value.setter
|
||||||
|
def value(self, value: str) -> None:
|
||||||
|
self._value = value
|
||||||
|
|
||||||
|
"""
|
||||||
|
Excel File Structure Error Codes
|
||||||
|
"""
|
||||||
|
|
||||||
|
def EXL00(self):
|
||||||
|
return f"The provided file '{self.pk}' is not an excel(xlsx) file"
|
||||||
|
|
||||||
|
def EFS01(self):
|
||||||
|
return "The 'Growth media' sheet is missing. Please check the provided excel template."
|
||||||
|
|
||||||
|
def EFS02(self):
|
||||||
|
return "The 'Geographic origin' sheet is missing. Please check the provided excel template."
|
||||||
|
|
||||||
|
def EFS03(self):
|
||||||
|
return "The 'Literature' sheet is missing. Please check the provided excel template."
|
||||||
|
|
||||||
|
def EFS04(self):
|
||||||
|
return "The 'Sexual state' sheet is missing. Please check the provided excel template."
|
||||||
|
|
||||||
|
def EFS05(self):
|
||||||
|
return "The 'Strains' sheet is missing. Please check the provided excel template."
|
||||||
|
|
||||||
|
def EFS06(self):
|
||||||
|
return "The 'Ontobiotope' sheet is missing. Please check the provided excel template."
|
||||||
|
|
||||||
|
def EFS07(self):
|
||||||
|
return "The 'Markers' sheet is missing. Please check the provided excel template."
|
||||||
|
|
||||||
|
def EFS08(self):
|
||||||
|
return "The 'Genomic information' sheet is missing. Please check the provided excel template."
|
||||||
|
|
||||||
|
"""
|
||||||
|
Growth Media Error Codes
|
||||||
|
"""
|
||||||
|
|
||||||
|
def GMD01(self):
|
||||||
|
return "The 'Acronym' column is a mandatory field in the Growth Media sheet."
|
||||||
|
|
||||||
|
def GMD02(self):
|
||||||
|
return "The 'Acronym' column is empty or has missing values."
|
||||||
|
|
||||||
|
def GMD03(self):
|
||||||
|
return "The 'Description' column is a mandatory field in the Growth Media sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def GMD04(self):
|
||||||
|
return f"The 'Description' for growth media with Acronym {self.pk} is missing."
|
||||||
|
|
||||||
|
"""
|
||||||
|
Geographic Origin Error Codes
|
||||||
|
"""
|
||||||
|
|
||||||
|
def GOD01(self):
|
||||||
|
return "The 'ID' column is a mandatory field in the Geographic Origin sheet."
|
||||||
|
|
||||||
|
def GOD02(self):
|
||||||
|
return "The 'ID' column is empty or has missing values."
|
||||||
|
|
||||||
|
def GOD03(self):
|
||||||
|
return "The 'Country' column is a mandatory field in the Geographic Origin sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def GOD04(self):
|
||||||
|
return f"The 'Country' for geographic origin with ID {self.pk} is missing."
|
||||||
|
|
||||||
|
def GOD05(self):
|
||||||
|
return f"The 'Country' for geographic origin with ID {self.pk} is incorrect."
|
||||||
|
|
||||||
|
def GOD06(self):
|
||||||
|
return f"The 'Locality' column is a mandatory field in the Geographic Origin sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def GOD07(self):
|
||||||
|
return f"The 'Locality' for geographic origin with ID {self.pk} is missing."
|
||||||
|
|
||||||
|
"""
|
||||||
|
Literature Error Codes
|
||||||
|
"""
|
||||||
|
|
||||||
|
def LID01(self):
|
||||||
|
return "The 'ID' column is a mandatory field in the Literature sheet."
|
||||||
|
|
||||||
|
def LID02(self):
|
||||||
|
return "The 'ID' column empty or missing values."
|
||||||
|
|
||||||
|
def LID03(self):
|
||||||
|
return "The 'Full reference' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def LID04(self):
|
||||||
|
return f"The 'Full reference' for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
|
def LID05(self):
|
||||||
|
return "The 'Authors' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def LID06(self):
|
||||||
|
return f"The 'Authors' for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
|
def LID07(self):
|
||||||
|
return "The 'Title' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def LID08(self):
|
||||||
|
return f"The 'Title' for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
|
def LID09(self):
|
||||||
|
return "The 'Journal' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def LID10(self):
|
||||||
|
return f"The 'Journal' for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
|
def LID11(self):
|
||||||
|
return "The 'Year' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def LID12(self,):
|
||||||
|
return f"The 'Year' for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
|
def LID13(self):
|
||||||
|
return "The 'Volume' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def LID14(self):
|
||||||
|
return f"The 'Volume' for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
|
def LID15(self):
|
||||||
|
return "The 'First page' column is a mandatory field. The column can not be empty."
|
||||||
|
|
||||||
|
def LID16(self):
|
||||||
|
return f"The 'First page' for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
|
def LID17(self):
|
||||||
|
msg = 'If journal; Title, Authors, journal, year and first page are required'
|
||||||
|
msg += 'If Book; Book Title, Authors, Year, Editors, Publishers'
|
||||||
|
return msg
|
||||||
|
|
||||||
|
"""
|
||||||
|
Strains Error Codes
|
||||||
|
"""
|
||||||
|
|
||||||
|
def STD01(self):
|
||||||
|
return "The 'Accession number' column is a mandatory field in the Strains sheet."
|
||||||
|
|
||||||
|
def STD02(self):
|
||||||
|
return "The 'Accession number' column is empty or has missing values."
|
||||||
|
|
||||||
|
def STD03(self):
|
||||||
|
return f"The 'Accesion number' must be unique. The '{self.value}' is repeated."
|
||||||
|
|
||||||
|
def STD04(self):
|
||||||
|
return (f"The 'Accession number' {self.pk} is not according to the specification."
|
||||||
|
" The value must be of the format '<Sequence of characters> <sequence of characters>'.")
|
||||||
|
|
||||||
|
def STD05(self):
|
||||||
|
return f"The 'Restriction on use' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def STD06(self):
|
||||||
|
return f"The 'Restriction on use' for strain with Accession Number {self.pk} is missing."
|
||||||
|
|
||||||
|
def STD07(self):
|
||||||
|
return (f"The 'Restriction on use' for strain with Accession Number {self.pk} is not according to the specification."
|
||||||
|
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
||||||
|
|
||||||
|
def STD08(self):
|
||||||
|
return f"The 'Nagoya protocol restrictions and compliance conditions' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def STD09(self):
|
||||||
|
return f"The 'Nagoya protocol restrictions and compliance conditions' for strain with Accession Number {self.pk} is missing."
|
||||||
|
|
||||||
|
def STD10(self):
|
||||||
|
return (f"The 'Nagoya protocol restrictions and compliance conditions' for strain with Accession Number {self.pk} is not according to the specification."
|
||||||
|
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
||||||
|
|
||||||
|
def STD11(self):
|
||||||
|
return (f"The 'Strain from a registered collection' for strain with Accession Number {self.pk} is not according to specification."
|
||||||
|
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
||||||
|
|
||||||
|
def STD12(self):
|
||||||
|
return "The 'Risk group' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def STD13(self):
|
||||||
|
return f"The 'Risk group' for strain with Accession Number {self.pk} is missing."
|
||||||
|
|
||||||
|
def STD14(self):
|
||||||
|
return (f"The 'Risk group' for strain with Accession Number {self.pk} is not according to specification."
|
||||||
|
f" Your value is {self.value} and the accepted values are 1, 2, 3, 4.")
|
||||||
|
|
||||||
|
def STD15(self):
|
||||||
|
return (f"The 'Dual use' for strain with Accession Number {self.pk} is not according to specification."
|
||||||
|
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||||
|
|
||||||
|
def STD16(self):
|
||||||
|
return (f"The “Quarantine in europe” for strain with Accession Number {self.pk} is not according to specification."
|
||||||
|
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||||
|
|
||||||
|
def STD17(self):
|
||||||
|
return f"The 'Organism type' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def STD18(self):
|
||||||
|
return f"The 'Organism type' for strain with Accession Number {self.pk} is missing."
|
||||||
|
|
||||||
|
def STD19(self):
|
||||||
|
return (f"The 'Organism type' for strain with Accession Number {self.pk} is not according to specification."
|
||||||
|
f" Your value is {self.value} and the accepted values are 'Algae', 'Archaea', 'Bacteria', 'Cyanobacteria', "
|
||||||
|
"'Filamentous Fungi', 'Phage', 'Plasmid', 'Virus', 'Yeast', 1, 2, 3, 4, 5, 6, 7, 8, 9.")
|
||||||
|
|
||||||
|
def STD20(self):
|
||||||
|
return f"The 'Taxon name' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def STD21(self):
|
||||||
|
return f"The 'Taxon name' for strain with Accession Number {self.pk} is missing."
|
||||||
|
|
||||||
|
def STD22(self):
|
||||||
|
return f"The 'Taxon name' for strain with Accession Number {self.pk} is incorrect."
|
||||||
|
|
||||||
|
def STD23(self):
|
||||||
|
return (f"The 'Interspecific hybrid' for strain with Accession Number {self.pk} is not according to specification."
|
||||||
|
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||||
|
|
||||||
|
def STD24(self):
|
||||||
|
return f"The 'History of deposit' for strain with Accession Number {self.pk} is incorrect."
|
||||||
|
|
||||||
|
def STD25(self):
|
||||||
|
return (f"The 'Date of deposit' for strain with Accession Number {self.pk} is incorrect."
|
||||||
|
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||||
|
|
||||||
|
def STD26(self):
|
||||||
|
return (f"The 'Date of inclusion in the catalogue' for strain with Accession Number {self.pk} is incorrect."
|
||||||
|
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||||
|
|
||||||
|
def STD27(self):
|
||||||
|
return (f"The 'Date of collection' for strain with Accession Number {self.pk} is incorrect."
|
||||||
|
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||||
|
|
||||||
|
def STD28(self):
|
||||||
|
return (f"The 'Date of isolation' for strain with Accession Number {self.pk} is incorrect."
|
||||||
|
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||||
|
|
||||||
|
def STD29(self):
|
||||||
|
return (f"The 'Tested temperature growth range' for strain with Accession Number {self.pk} is incorrect."
|
||||||
|
" It must have two decimal numbers separated by ','")
|
||||||
|
|
||||||
|
def STD30(self):
|
||||||
|
return f"The 'Recommended growth temperature' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def STD31(self):
|
||||||
|
return f"The 'Recommended growth temperature' for strain with Accession Number {self.pk} is missing."
|
||||||
|
|
||||||
|
def STD32(self):
|
||||||
|
return (f"The 'Recommended growth temperature' for strain with Accession Number {self.pk} is incorrect."
|
||||||
|
" It must have two decimal numbers separated by ','.")
|
||||||
|
|
||||||
|
def STD33(self):
|
||||||
|
return f"The 'Recommended medium for growth' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def STD34(self):
|
||||||
|
return f"The 'Recommended medium for growth' for strain with Accession Number {self.pk} is missing."
|
||||||
|
|
||||||
|
def STD35(self):
|
||||||
|
return f"The value of 'Recommended medium for growth' for strain with Accession Number {self.pk} is not in the Growth Media Sheet."
|
||||||
|
|
||||||
|
def STD36(self):
|
||||||
|
return f"The 'Forms of supply' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def STD37(self):
|
||||||
|
return f"The 'Forms of supply' for strain with Accession Number {self.pk} is missing."
|
||||||
|
|
||||||
|
def STD38(self):
|
||||||
|
return f"The value of 'Forms of supply' for strain with Accession Number {self.pk} is not in the Forms of Supply Sheet."
|
||||||
|
|
||||||
|
def STD39(self):
|
||||||
|
return (f"The 'Coordinates of geographic origin' column for strain with Accession Number {self.pk} is incorrect."
|
||||||
|
"The allowed formats are two or three decimal numbers separated by ','. Moreover, the first number must be"
|
||||||
|
"between [-90, 90], the second between [-180, 180], and the third, if provided, can assume any value.")
|
||||||
|
|
||||||
|
def STD40(self):
|
||||||
|
return (f"The 'Altitude of geographic origin' column for strain with Accession Number {self.pk} is incorrect."
|
||||||
|
"The allowed formats are one decimal number between [-200, 8000].")
|
||||||
|
|
||||||
|
def STD41(self):
|
||||||
|
return f"The value of 'Ontobiotope term for the isolation habitat' for strain with Accession Number {self.pk} is not in the Ontobiotope Sheet."
|
||||||
|
|
||||||
|
def STD42(self):
|
||||||
|
return (f"The 'GMO' for strain with Accession Number {self.pk} is not according to specification."
|
||||||
|
f" Your value is {self.value} and the accepted values are 1, 2")
|
||||||
|
|
||||||
|
def STD43(self):
|
||||||
|
return (f"The 'Sexual State' for strain with Accession Number {self.pk} is not according to specification."
|
||||||
|
f" Your value is {self.value} and the accepted values are 'Mata', 'Matalpha', 'Mata/Matalpha', "
|
||||||
|
"'Matb', 'Mata/Matb', 'MTLa', 'MTLalpha', 'MTLa/MTLalpha', 'MAT1-1', 'MAT1-2', 'MAT1', 'MAT2', 'MT+', 'MT-'")
|
||||||
|
|
||||||
|
def STD44(self):
|
||||||
|
return (f"The 'Ploidy' for strain with Accession Number {self.pk} is not according to specification."
|
||||||
|
f" Your value is {self.value} and the accepted values are 0, 1, 2, 3, 4, 9")
|
||||||
|
|
||||||
|
def STD45(self):
|
||||||
|
msg = f"At least one of the values '{self.value}' of the literature field for strain {self.pk} are not in the literature sheet. "
|
||||||
|
msg += "If the those values are Pubmed ids or DOIs, please ignore this messsage"
|
||||||
|
return msg
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Genomic Information Error Codes
|
||||||
|
"""
|
||||||
|
|
||||||
|
def GID01(self):
|
||||||
|
return f"The 'Strain Acession Number' (Strain AN) column is a mandatory field in the Genomic Information Sheet."
|
||||||
|
|
||||||
|
def GID02(self):
|
||||||
|
return f"The 'Strain Acession Number' (Strain AN) column is empty or has missing values."
|
||||||
|
|
||||||
|
def GID03(self):
|
||||||
|
return f"The value of 'Strain Acession Number' (Strain AN) {self.value} is not in the Strains sheet."
|
||||||
|
|
||||||
|
def GID04(self):
|
||||||
|
return f"The 'Marker' column is a mandatory field in the Genomic Information Sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def GID05(self):
|
||||||
|
return f"The 'Marker' for genomic information with Strain AN {self.pk} is missing."
|
||||||
|
|
||||||
|
def GID06(self):
|
||||||
|
return f"The 'Marker' for genomic information with Strain AN {self.pk} is incorrect."
|
||||||
|
|
||||||
|
def GID07(self):
|
||||||
|
return f"The 'INSDC AN' column is a mandatory field in the Genomic Information Sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def GID08(self):
|
||||||
|
return f"The 'INSDC AN' for genomic information with Strain AN {self.pk} is missing."
|
||||||
|
|
||||||
|
def GID09(self):
|
||||||
|
return f"The 'INSDC AN' for genomic information with Strain AN {self.pk} is incorrect."
|
||||||
|
|
||||||
|
def GID10(self):
|
||||||
|
return (f"The 'Sequence' for genomic information with Strain AN {self.pk} is incorrect."
|
||||||
|
" It must be a sequence of 'G', 'T', 'A', 'C' characteres of any length and without white spaces.")
|
||||||
|
|
||||||
|
"""
|
||||||
|
Ontobiotope Error Codes
|
||||||
|
"""
|
||||||
|
|
||||||
|
def OTD01(self):
|
||||||
|
return "The 'ID' columns is a mandatory field in the Ontobiotope Sheet."
|
||||||
|
|
||||||
|
def OTD02(self):
|
||||||
|
return "The 'ID' columns is empty or has missing values."
|
||||||
|
|
||||||
|
def OTD03(self):
|
||||||
|
return "The 'Name' columns is a mandatory field in the Ontobiotope Sheet. The column can not be empty."
|
||||||
|
|
||||||
|
def OTD04(self):
|
||||||
|
return f"The 'Name' for ontobiotope with ID {self.pk} is missing."
|
||||||
483
mirri/validation/excel_validator.py
Normal file
483
mirri/validation/excel_validator.py
Normal file
@ -0,0 +1,483 @@
|
|||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from io import BytesIO
|
||||||
|
from zipfile import BadZipfile
|
||||||
|
from datetime import datetime
|
||||||
|
from calendar import monthrange
|
||||||
|
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
|
||||||
|
from mirri.io.parsers.excel import workbook_sheet_reader, get_all_cell_data_from_sheet
|
||||||
|
from mirri.validation.error_logging import ErrorLog, Error
|
||||||
|
from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
|
||||||
|
ERROR_CODE, FIELD, MANDATORY, MATCH,
|
||||||
|
MISSING, MULTIPLE, NAGOYA, NUMBER, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON,
|
||||||
|
TYPE, UNIQUE, VALIDATION, VALUES, BIBLIO)
|
||||||
|
from mirri.settings import LOCATIONS, SUBTAXAS
|
||||||
|
from mirri.validation.validation_conf_20200601 import MIRRI_20200601_VALLIDATION_CONF
|
||||||
|
|
||||||
|
|
||||||
|
def validate_mirri_excel(fhand, version="20200601"):
|
||||||
|
if version == "20200601":
|
||||||
|
configuration = MIRRI_20200601_VALLIDATION_CONF
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("Only version20200601 is implemented")
|
||||||
|
|
||||||
|
return validate_excel(fhand, configuration)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_excel(fhand, configuration):
|
||||||
|
validation_conf = configuration['sheet_schema']
|
||||||
|
cross_ref_conf = configuration['cross_ref_conf']
|
||||||
|
in_memory_sheet_conf = configuration['keep_sheets_in_memory']
|
||||||
|
excel_name = Path(fhand.name).stem
|
||||||
|
error_log = ErrorLog(excel_name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
workbook = load_workbook(filename=BytesIO(
|
||||||
|
fhand.read()), read_only=True, data_only=True)
|
||||||
|
except (BadZipfile, IOError):
|
||||||
|
error = Error('EXL00', fhand.name, fhand.name)
|
||||||
|
error_log.add_error(error)
|
||||||
|
return error_log
|
||||||
|
|
||||||
|
# excel structure errors
|
||||||
|
structure_errors = list(validate_excel_structure(workbook, validation_conf))
|
||||||
|
if structure_errors:
|
||||||
|
for error in structure_errors:
|
||||||
|
error = Error(error[ERROR_CODE], pk=error['id'],
|
||||||
|
data=error['value'])
|
||||||
|
error_log.add_error(error)
|
||||||
|
|
||||||
|
return error_log
|
||||||
|
|
||||||
|
crossrefs = get_all_crossrefs(workbook, cross_ref_conf)
|
||||||
|
in_memory_sheets = get_all_in_memory_sheet(workbook, in_memory_sheet_conf)
|
||||||
|
content_errors = validate_content(workbook, validation_conf,
|
||||||
|
crossrefs, in_memory_sheets)
|
||||||
|
|
||||||
|
for error in content_errors:
|
||||||
|
# if error[ERROR_CODE] == 'STD43':
|
||||||
|
# continue
|
||||||
|
error = Error(error[ERROR_CODE], pk=error['id'], data=error['value'])
|
||||||
|
|
||||||
|
error_log.add_error(error)
|
||||||
|
return error_log
|
||||||
|
|
||||||
|
|
||||||
|
def validate_excel_structure(workbook, validation_conf):
|
||||||
|
for sheet_name, sheet_conf in validation_conf.items():
|
||||||
|
mandatory = sheet_conf.get(VALIDATION, {}).get(TYPE, None)
|
||||||
|
mandatory = mandatory == MANDATORY
|
||||||
|
|
||||||
|
error_code = sheet_conf.get(VALIDATION, {}).get(ERROR_CODE, False)
|
||||||
|
try:
|
||||||
|
sheet = workbook[sheet_name]
|
||||||
|
except KeyError:
|
||||||
|
sheet = None
|
||||||
|
|
||||||
|
if sheet is None:
|
||||||
|
if mandatory:
|
||||||
|
yield {'id': None, 'sheet': sheet_name, 'field': None,
|
||||||
|
'error_code': error_code, 'value': None}
|
||||||
|
continue
|
||||||
|
|
||||||
|
headers = _get_sheet_headers(sheet)
|
||||||
|
for column in sheet_conf.get(COLUMNS):
|
||||||
|
field = column[FIELD]
|
||||||
|
for step in column.get(VALIDATION, []):
|
||||||
|
if step[TYPE] == MANDATORY and field not in headers:
|
||||||
|
yield {'id': None, 'sheet': sheet_name, 'field': field,
|
||||||
|
'error_code': step[ERROR_CODE], 'value': None}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_sheet_headers(sheet):
|
||||||
|
first_row = next(sheet.iter_rows(min_row=1, max_row=1))
|
||||||
|
return [c.value for c in first_row]
|
||||||
|
|
||||||
|
|
||||||
|
def _get_values_from_columns(workbook, sheet_name, columns):
|
||||||
|
indexed_values = {}
|
||||||
|
for row in workbook_sheet_reader(workbook, sheet_name):
|
||||||
|
for col in columns:
|
||||||
|
indexed_values[str(row.get(col))] = ""
|
||||||
|
|
||||||
|
return indexed_values
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_crossrefs(workbook, cross_refs_names):
|
||||||
|
crossrefs = {}
|
||||||
|
for ref_name, columns in cross_refs_names.items():
|
||||||
|
if columns:
|
||||||
|
crossrefs[ref_name] = _get_values_from_columns(workbook, ref_name,
|
||||||
|
columns)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
crossrefs[ref_name] = get_all_cell_data_from_sheet(workbook, ref_name)
|
||||||
|
except ValueError as error:
|
||||||
|
if 'sheet is missing' in str(error):
|
||||||
|
crossrefs[ref_name] = []
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
return crossrefs
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_in_memory_sheet(workbook, in_memory_sheet_conf):
|
||||||
|
in_memory_sheets = {}
|
||||||
|
for sheet_conf in in_memory_sheet_conf:
|
||||||
|
sheet_name = sheet_conf['sheet_name']
|
||||||
|
indexed_by = sheet_conf['indexed_by']
|
||||||
|
rows = workbook_sheet_reader(workbook, sheet_name)
|
||||||
|
indexed_rows = {row[indexed_by]: row for row in rows}
|
||||||
|
in_memory_sheets[sheet_name] = indexed_rows
|
||||||
|
|
||||||
|
return in_memory_sheets
|
||||||
|
|
||||||
|
|
||||||
|
def validate_content(workbook, validation_conf, crossrefs, in_memory_sheets):
|
||||||
|
for sheet_name in validation_conf.keys():
|
||||||
|
sheet_conf = validation_conf[sheet_name]
|
||||||
|
sheet_id_column = sheet_conf['id_field']
|
||||||
|
shown_values = {}
|
||||||
|
row_validation_steps = sheet_conf.get(ROW_VALIDATION, None)
|
||||||
|
for row in workbook_sheet_reader(workbook, sheet_name):
|
||||||
|
id_ = row.get(sheet_id_column, None)
|
||||||
|
if id_ is None:
|
||||||
|
error_code = _get_missing_row_id_error(sheet_id_column,
|
||||||
|
sheet_conf)
|
||||||
|
yield {'id': id_, 'sheet': sheet_name,
|
||||||
|
'field': sheet_id_column,
|
||||||
|
'error_code': error_code, 'value': None}
|
||||||
|
continue
|
||||||
|
do_have_cell_error = False
|
||||||
|
for column in sheet_conf[COLUMNS]:
|
||||||
|
label = column[FIELD]
|
||||||
|
validation_steps = column.get(VALIDATION, None)
|
||||||
|
value = row.get(label, None)
|
||||||
|
if validation_steps:
|
||||||
|
error_code = validate_cell(value, validation_steps,
|
||||||
|
crossrefs, shown_values, label)
|
||||||
|
if error_code is not None:
|
||||||
|
do_have_cell_error = True
|
||||||
|
yield {'id': id_, 'sheet': sheet_name, 'field': label,
|
||||||
|
'error_code': error_code, 'value': value}
|
||||||
|
|
||||||
|
if not do_have_cell_error and row_validation_steps:
|
||||||
|
error_code = validate_row(
|
||||||
|
row, row_validation_steps, in_memory_sheets)
|
||||||
|
if error_code is not None:
|
||||||
|
yield {'id': id_, 'sheet': sheet_name, 'field': 'row',
|
||||||
|
'error_code': error_code, 'value': 'row'}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_missing_row_id_error(sheet_id_column, sheet_conf):
|
||||||
|
error_code = None
|
||||||
|
for column in sheet_conf[COLUMNS]:
|
||||||
|
if column[FIELD] == sheet_id_column:
|
||||||
|
error_code = [step[ERROR_CODE]
|
||||||
|
for step in column[VALIDATION] if step[TYPE] == MISSING][0]
|
||||||
|
return error_code
|
||||||
|
|
||||||
|
|
||||||
|
def validate_row(row, validation_steps, in_memory_sheets):
|
||||||
|
for validation_step in validation_steps:
|
||||||
|
kind = validation_step[TYPE]
|
||||||
|
error_code = validation_step[ERROR_CODE]
|
||||||
|
if kind == NAGOYA:
|
||||||
|
if not is_valid_nagoya(row, in_memory_sheets):
|
||||||
|
return error_code
|
||||||
|
elif kind == BIBLIO:
|
||||||
|
if not is_valid_pub(row):
|
||||||
|
return error_code
|
||||||
|
else:
|
||||||
|
msg = f'{kind} is not a recognized row validation type method'
|
||||||
|
raise NotImplementedError(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_cell(value, validation_steps, crossrefs, shown_values, label):
|
||||||
|
|
||||||
|
for step_conf in validation_steps:
|
||||||
|
if step_conf[TYPE] == MANDATORY:
|
||||||
|
continue
|
||||||
|
step_conf['crossrefs_pointer'] = crossrefs
|
||||||
|
step_conf['shown_values'] = shown_values
|
||||||
|
step_conf['label'] = label
|
||||||
|
error_code = validate_value(value, step_conf)
|
||||||
|
|
||||||
|
if error_code is not None:
|
||||||
|
return error_code
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_pub(row):
|
||||||
|
title = row.get('Title', None)
|
||||||
|
full_reference = row.get('Full reference', None)
|
||||||
|
authors = row.get('Authors', None)
|
||||||
|
journal = row.get('Journal', None)
|
||||||
|
year = row.get('Year', None)
|
||||||
|
volumen = row.get('Volumen', None)
|
||||||
|
first_page = row.get('First page', None)
|
||||||
|
book_title = row.get('Book title', None)
|
||||||
|
editors = row.get('Editors', None)
|
||||||
|
publishers = row.get('Publishers', None)
|
||||||
|
|
||||||
|
if full_reference:
|
||||||
|
return True
|
||||||
|
is_journal = bool(title)
|
||||||
|
|
||||||
|
if (is_journal and (not authors or not journal or not not year or
|
||||||
|
not volumen or not first_page)):
|
||||||
|
return False
|
||||||
|
if (not is_journal and (not authors or not year or
|
||||||
|
not editors or not publishers or not book_title)):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_nagoya(row, in_memory_sheets): # sourcery skip: return-identity
|
||||||
|
location_index = row.get('Geographic origin', None)
|
||||||
|
if location_index is None:
|
||||||
|
country = None
|
||||||
|
else:
|
||||||
|
geo_origin = in_memory_sheets[LOCATIONS].get(location_index, {})
|
||||||
|
country = geo_origin.get('Country', None)
|
||||||
|
|
||||||
|
_date = row.get("Date of collection", None)
|
||||||
|
if _date is None:
|
||||||
|
_date = row.get("Date of isolation", None)
|
||||||
|
if _date is None:
|
||||||
|
_date = row.get("Date of deposit", None)
|
||||||
|
if _date is None:
|
||||||
|
_date = row.get("Date of inclusion in the catalogue", None)
|
||||||
|
if _date is not None:
|
||||||
|
year = _date.year if isinstance(_date, datetime) else int(str(_date)[:4])
|
||||||
|
else:
|
||||||
|
year = None
|
||||||
|
|
||||||
|
if year is not None and year >= 2014 and country is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_regex(value, validation_conf):
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
value = str(value)
|
||||||
|
regexp = validation_conf[MATCH]
|
||||||
|
multiple = validation_conf.get(MULTIPLE, False)
|
||||||
|
separator = validation_conf.get(SEPARATOR, None)
|
||||||
|
|
||||||
|
values = [v.strip() for v in value.split(
|
||||||
|
separator)] if multiple else [value]
|
||||||
|
|
||||||
|
for value in values:
|
||||||
|
matches_regexp = re.fullmatch(regexp, value)
|
||||||
|
if not matches_regexp:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_crossrefs(value, validation_conf):
|
||||||
|
crossref_name = validation_conf[CROSSREF_NAME]
|
||||||
|
crossrefs = validation_conf['crossrefs_pointer']
|
||||||
|
choices = crossrefs[crossref_name]
|
||||||
|
if value is None or not choices:
|
||||||
|
return True
|
||||||
|
value = str(value)
|
||||||
|
|
||||||
|
multiple = validation_conf.get(MULTIPLE, False)
|
||||||
|
separator = validation_conf.get(SEPARATOR, None)
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
if multiple:
|
||||||
|
values = [v.strip() for v in value.split(separator)]
|
||||||
|
else:
|
||||||
|
values = [value.strip()]
|
||||||
|
|
||||||
|
return all(value in choices for value in values)
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_choices(value, validation_conf):
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
choices = validation_conf[VALUES]
|
||||||
|
multiple = validation_conf.get(MULTIPLE, False)
|
||||||
|
separator = validation_conf.get(SEPARATOR, None)
|
||||||
|
|
||||||
|
if multiple:
|
||||||
|
values = [v.strip() for v in str(value).split(separator)]
|
||||||
|
else:
|
||||||
|
values = [str(value).strip()]
|
||||||
|
|
||||||
|
return all(value in choices for value in values)
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_date(value, validation_conf):
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
if isinstance(value, datetime):
|
||||||
|
year = value.year
|
||||||
|
month = value.month
|
||||||
|
day = value.day
|
||||||
|
elif isinstance(value, int):
|
||||||
|
year = value
|
||||||
|
month = None
|
||||||
|
day = None
|
||||||
|
elif isinstance(value, str):
|
||||||
|
value = value.replace('-', '')
|
||||||
|
value = value.replace('/', '')
|
||||||
|
month = None
|
||||||
|
day = None
|
||||||
|
try:
|
||||||
|
year = int(value[: 4])
|
||||||
|
if len(value) >= 6:
|
||||||
|
month = int(value[4: 6])
|
||||||
|
if len(value) >= 8:
|
||||||
|
day = int(value[6: 8])
|
||||||
|
|
||||||
|
except (IndexError, TypeError, ValueError):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if year < 1700 or year > datetime.now().year:
|
||||||
|
return False
|
||||||
|
if month is not None:
|
||||||
|
if month < 1 or month > 13:
|
||||||
|
return False
|
||||||
|
if day is not None and (day < 1 or day > monthrange(year, month)[1]):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_coords(value, validation_conf=None):
|
||||||
|
# sourcery skip: return-identity
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
items = [i.strip() for i in value.split(";")]
|
||||||
|
latitude = float(items[0])
|
||||||
|
longitude = float(items[1])
|
||||||
|
if len(items) > 2:
|
||||||
|
precision = float(items[2])
|
||||||
|
if latitude < -90 or latitude > 90:
|
||||||
|
return False
|
||||||
|
if longitude < -180 or longitude > 180:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_missing(value, validation_conf=None):
|
||||||
|
return value is not None
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_number(value, validation_conf):
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
value = float(value)
|
||||||
|
except TypeError:
|
||||||
|
return False
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
_max = validation_conf.get('max', None)
|
||||||
|
_min = validation_conf.get('min', None)
|
||||||
|
if (_max is not None and value > _max) or (_min is not None and value < _min):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_taxon(value, validation_conf=None):
|
||||||
|
multiple = validation_conf.get(MULTIPLE, False)
|
||||||
|
separator = validation_conf.get(SEPARATOR, ';')
|
||||||
|
|
||||||
|
value = value.split(separator) if multiple else [value]
|
||||||
|
for taxon in value:
|
||||||
|
taxon = taxon.strip()
|
||||||
|
if not _is_valid_taxon(taxon):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _is_valid_taxon(value):
|
||||||
|
value = value.strip()
|
||||||
|
if not value:
|
||||||
|
return True
|
||||||
|
|
||||||
|
items = re.split(r" +", value)
|
||||||
|
genus = items[0]
|
||||||
|
|
||||||
|
if len(items) > 1:
|
||||||
|
species = items[1]
|
||||||
|
if species in ("sp", "spp", ".sp", "sp."):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if len(items) > 2:
|
||||||
|
for index in range(0, len(items[2:]), 2):
|
||||||
|
rank = SUBTAXAS.get(items[index + 2], None)
|
||||||
|
if rank is None:
|
||||||
|
print(value)
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_unique(value, validation_conf):
|
||||||
|
label = validation_conf['label']
|
||||||
|
shown_values = validation_conf['shown_values']
|
||||||
|
if label not in shown_values:
|
||||||
|
shown_values[label] = {}
|
||||||
|
|
||||||
|
already_in_file = shown_values[label]
|
||||||
|
if value in already_in_file:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# NOTE: what's the use of this?
|
||||||
|
# What is the expected format for value and shown_values?
|
||||||
|
shown_values[label][value] = None
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_file(path):
|
||||||
|
try:
|
||||||
|
with path.open("rb") as fhand:
|
||||||
|
error_log = validate_mirri_excel(fhand)
|
||||||
|
if "EXL" in error_log.get_errors():
|
||||||
|
return False
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
VALIDATION_FUNCTIONS = {
|
||||||
|
MISSING: is_valid_missing,
|
||||||
|
REGEXP: is_valid_regex,
|
||||||
|
CHOICES: is_valid_choices,
|
||||||
|
CROSSREF: is_valid_crossrefs,
|
||||||
|
DATE: is_valid_date,
|
||||||
|
COORDINATES: is_valid_coords,
|
||||||
|
NUMBER: is_valid_number,
|
||||||
|
TAXON: is_valid_taxon,
|
||||||
|
UNIQUE: is_valid_unique}
|
||||||
|
|
||||||
|
|
||||||
|
def validate_value(value, step_conf):
|
||||||
|
kind = step_conf[TYPE]
|
||||||
|
try:
|
||||||
|
is_value_valid = VALIDATION_FUNCTIONS[kind]
|
||||||
|
except KeyError:
|
||||||
|
msg = f'This validation type {kind} is not implemented'
|
||||||
|
raise NotImplementedError(msg)
|
||||||
|
|
||||||
|
error_code = step_conf[ERROR_CODE]
|
||||||
|
|
||||||
|
if not is_value_valid(value, step_conf):
|
||||||
|
return error_code
|
||||||
24
mirri/validation/tags.py
Normal file
24
mirri/validation/tags.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
MANDATORY = "mandatory"
|
||||||
|
REGEXP = "regexp"
|
||||||
|
CHOICES = "choices"
|
||||||
|
CROSSREF = 'crossref'
|
||||||
|
CROSSREF_NAME = 'crossref_name'
|
||||||
|
MISSING = "missing"
|
||||||
|
VALIDATION = 'validation'
|
||||||
|
ERROR_CODE = 'error_code'
|
||||||
|
FIELD = 'field'
|
||||||
|
MULTIPLE = 'multiple'
|
||||||
|
TYPE = 'type'
|
||||||
|
COLUMNS = 'columns'
|
||||||
|
SOURCE = "sources"
|
||||||
|
SEPARATOR = "separator"
|
||||||
|
MATCH = 'match'
|
||||||
|
VALUES = 'values'
|
||||||
|
DATE = 'date'
|
||||||
|
COORDINATES = 'coord'
|
||||||
|
NUMBER = 'number'
|
||||||
|
TAXON = 'taxon'
|
||||||
|
UNIQUE = 'unique'
|
||||||
|
ROW_VALIDATION = 'row_validation'
|
||||||
|
NAGOYA = 'nagoya'
|
||||||
|
BIBLIO = 'bibliography'
|
||||||
548
mirri/validation/validation_conf_20200601.py
Normal file
548
mirri/validation/validation_conf_20200601.py
Normal file
@ -0,0 +1,548 @@
|
|||||||
|
from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
|
||||||
|
ERROR_CODE, FIELD, MANDATORY, MATCH,
|
||||||
|
MISSING, MULTIPLE, NAGOYA, NUMBER, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON, TYPE,
|
||||||
|
UNIQUE,
|
||||||
|
VALIDATION, VALUES, BIBLIO)
|
||||||
|
from mirri.settings import (GEOGRAPHIC_ORIGIN, ONTOBIOTOPE, LOCATIONS, GROWTH_MEDIA, GENOMIC_INFO,
|
||||||
|
STRAINS, LITERATURE_SHEET, SEXUAL_STATE_SHEET)
|
||||||
|
# MARKERS,
|
||||||
|
# SEXUAL_STATE_SHEET,
|
||||||
|
# RESOURCE_TYPES_VALUES,
|
||||||
|
# FORM_OF_SUPPLY_SHEET,
|
||||||
|
# PLOIDY_SHEET)
|
||||||
|
|
||||||
|
|
||||||
|
STRAIN_FIELDS = [
|
||||||
|
{
|
||||||
|
FIELD: "Accession number",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: 'STD01'},
|
||||||
|
{TYPE: UNIQUE, ERROR_CODE: 'STD03'},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "STD02"},
|
||||||
|
{TYPE: REGEXP, MATCH: "[^ ]* [^ ]*", ERROR_CODE: "STD04"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Restrictions on use",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "STD05"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "STD06"},
|
||||||
|
{TYPE: CHOICES, VALUES: ["1", "2", "3"],
|
||||||
|
MULTIPLE: False, ERROR_CODE: "STD07"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Nagoya protocol restrictions and compliance conditions",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "STD08"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "STD09"},
|
||||||
|
{TYPE: CHOICES, VALUES: ["1", "2", "3"],
|
||||||
|
MULTIPLE: False, ERROR_CODE: "STD10"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "ABS related files",
|
||||||
|
VALIDATION: [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "MTA file",
|
||||||
|
VALIDATION: [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Other culture collection numbers",
|
||||||
|
# VALIDATION: [
|
||||||
|
# {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD07",
|
||||||
|
# MULTIPLE: True, SEPARATOR: ";"}
|
||||||
|
# ]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Strain from a registered collection",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||||
|
ERROR_CODE: "STD11"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Risk Group",
|
||||||
|
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "STD12"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "STD13"},
|
||||||
|
{TYPE: CHOICES, VALUES: ["1", "2", "3", "4"],
|
||||||
|
MULTIPLE: False, ERROR_CODE: "STD14"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Dual use",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||||
|
ERROR_CODE: "STD15"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Quarantine in Europe",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||||
|
ERROR_CODE: "STD16"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Organism type",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "STD17"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "STD18"},
|
||||||
|
{TYPE: CHOICES, VALUES: ["Algae", "Archaea", "Bacteria",
|
||||||
|
"Cyanobacteria", "Filamentous Fungi",
|
||||||
|
"Phage", "Plasmid", "Virus", "Yeast",
|
||||||
|
"1", "2", "3", "4", "5", "6", "7", "8", "9"],
|
||||||
|
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD19"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Taxon name",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "STD20"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "STD21"},
|
||||||
|
{TYPE: TAXON, ERROR_CODE: "STD22", MULTIPLE: True,
|
||||||
|
SEPARATOR: ';'}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Infrasubspecific names",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Comment on taxonomy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Interspecific hybrid",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||||
|
ERROR_CODE: "STD23"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Status",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "History of deposit",
|
||||||
|
VALIDATION: [
|
||||||
|
# {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD24", # modify the regex
|
||||||
|
# MULTIPLE: True, SEPARATOR: ";"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Depositor"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Date of deposit",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: DATE, ERROR_CODE: "STD25"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Date of inclusion in the catalogue",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: DATE, ERROR_CODE: "STD26"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Collected by",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Date of collection",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: DATE, ERROR_CODE: "STD27"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Isolated by",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Date of isolation",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: DATE, ERROR_CODE: "STD28"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Substrate/host of isolation",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Tested temperature growth range",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?',
|
||||||
|
ERROR_CODE: "STD29", MULTIPLE: True, SEPARATOR: ";"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Recommended growth temperature",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "STD30"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "STD31"},
|
||||||
|
{TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?',
|
||||||
|
ERROR_CODE: "STD32",
|
||||||
|
MULTIPLE: True, SEPARATOR: ";"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Recommended medium for growth",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "STD33"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "STD34"},
|
||||||
|
{TYPE: CROSSREF, CROSSREF_NAME: "Growth media",
|
||||||
|
MULTIPLE: True, SEPARATOR: "/", ERROR_CODE: "STD35"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Form of supply",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "STD36"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "STD37"},
|
||||||
|
{TYPE: CHOICES, VALUES: ['Agar', 'Cryo', 'Dry Ice', 'Liquid Culture Medium',
|
||||||
|
'Lyo', 'Oil', 'Water'],
|
||||||
|
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD38"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Other denomination",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Coordinates of geographic origin",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: COORDINATES, ERROR_CODE: "STD39"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Altitude of geographic origin",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: NUMBER, 'max': 8000, 'min': -200, ERROR_CODE: "STD40"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# value can be in the cell or in another sheet. Don't configure this
|
||||||
|
FIELD: "Geographic origin",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Isolation habitat",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Ontobiotope term for the isolation habitat",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: CROSSREF, CROSSREF_NAME: "Ontobiotope",
|
||||||
|
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD41"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "GMO",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||||
|
ERROR_CODE: "STD42"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "GMO construction information",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Mutant information",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Genotype",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Sexual state",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: CROSSREF, CROSSREF_NAME: SEXUAL_STATE_SHEET,
|
||||||
|
ERROR_CODE: "STD43"}
|
||||||
|
# {TYPE: CHOICES, VALUES: ["Mata", "Matalpha", "Mata/Matalpha",
|
||||||
|
# "Matb", "Mata/Matb", "MTLa", "MTLalpha", "MTLa/MTLalpha",
|
||||||
|
# "MAT1-1", "MAT1-2", "MAT1", "MAT2", "MT+", "MT-"],
|
||||||
|
# ERROR_CODE: "STD43"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Ploidy",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: CHOICES, VALUES: ["0", "1", "2", "3", "4", "9"],
|
||||||
|
ERROR_CODE: "STD44"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Plasmids",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Plasmids collections fields",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# value can be in the cell or in another sheet. Don't configure this
|
||||||
|
FIELD: "Literature",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: CROSSREF, CROSSREF_NAME: LITERATURE_SHEET,
|
||||||
|
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD45"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Plant pathogenicity code",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Pathogenicity",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Enzyme production",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Production of metabolites",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Applications",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Remarks"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Literature linked to the sequence/genome",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
SHEETS_SCHEMA = {
|
||||||
|
LOCATIONS: {
|
||||||
|
"acronym": "GOD",
|
||||||
|
"id_field": "ID",
|
||||||
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS02"},
|
||||||
|
COLUMNS: [
|
||||||
|
{
|
||||||
|
FIELD: "ID",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "GOD01"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "GOD02"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Country",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "GOD03"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "GOD04"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Region",
|
||||||
|
VALIDATION: []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "City",
|
||||||
|
VALIDATION: []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Locality",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "GOD06"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "GOD07"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
GROWTH_MEDIA: {
|
||||||
|
"acronym": "GMD",
|
||||||
|
"id_field": "Acronym",
|
||||||
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS01"},
|
||||||
|
COLUMNS: [
|
||||||
|
{
|
||||||
|
FIELD: "Acronym",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "GMD01"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "GMD02"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Description",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "GMD03"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "GMD04"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Full description",
|
||||||
|
VALIDATION: []
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
GENOMIC_INFO: {
|
||||||
|
"acronym": "GID",
|
||||||
|
"id_field": "Strain AN",
|
||||||
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS08"},
|
||||||
|
COLUMNS: [
|
||||||
|
{
|
||||||
|
FIELD: "Strain AN",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "GID01"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "GID02"},
|
||||||
|
{TYPE: CROSSREF, CROSSREF_NAME: "Strains",
|
||||||
|
ERROR_CODE: "GID03"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Marker",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "GID04"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "GID05"},
|
||||||
|
{TYPE: CHOICES, ERROR_CODE: "GID06",
|
||||||
|
VALUES: ['16S rRNA', 'ACT', 'CaM', 'EF-1α', 'ITS',
|
||||||
|
'LSU', 'RPB1', 'RPB2', 'TUBB']}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "INSDC AN",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "GID07"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "GID08"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Sequence",
|
||||||
|
VALIDATION: []
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
STRAINS: {
|
||||||
|
"acronym": "STD",
|
||||||
|
'id_field': 'Accession number',
|
||||||
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS05"},
|
||||||
|
ROW_VALIDATION: [
|
||||||
|
{TYPE: NAGOYA, ERROR_CODE: "STRXXX"},
|
||||||
|
],
|
||||||
|
COLUMNS: STRAIN_FIELDS,
|
||||||
|
},
|
||||||
|
LITERATURE_SHEET: {
|
||||||
|
"acronym": "LID",
|
||||||
|
'id_field': 'ID',
|
||||||
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS03"},
|
||||||
|
ROW_VALIDATION: [
|
||||||
|
{TYPE: BIBLIO, ERROR_CODE: 'LID17'}
|
||||||
|
],
|
||||||
|
COLUMNS: [
|
||||||
|
{
|
||||||
|
FIELD: "ID",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "LID01"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "LID02"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Full reference",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "LID03"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Authors",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "LID05"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Title",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "LID07"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Journal",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "LID09"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Year",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "LID11"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Volume",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "LID13"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Issue",
|
||||||
|
VALIDATION: []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "First page",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "LID15"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "LID16"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Last page",
|
||||||
|
VALIDATION: []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Book title",
|
||||||
|
VALIDATION: []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Editors",
|
||||||
|
VALIDATION: []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Publisher",
|
||||||
|
VALIDATION: []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
# SEXUAL_STATE_SHEET: {"acronym": "SSD", COLUMNS: []},
|
||||||
|
# RESOURCE_TYPES_VALUES: {"acronym": "RTD", COLUMNS: []},
|
||||||
|
# FORM_OF_SUPPLY_SHEET: {"acronym": "FSD", COLUMNS: []},
|
||||||
|
# PLOIDY_SHEET: {"acronym": "PLD", COLUMNS: []},
|
||||||
|
ONTOBIOTOPE: {
|
||||||
|
"acronym": "OTD",
|
||||||
|
"id_field": "ID",
|
||||||
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS06"},
|
||||||
|
COLUMNS: [
|
||||||
|
{
|
||||||
|
FIELD: "ID",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "OTD01"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "OTD02"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Name",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "OTD03"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "OTD04"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
# MARKERS: {
|
||||||
|
# "acronym": "MKD",
|
||||||
|
# "id_field": "",
|
||||||
|
# COLUMNS: [
|
||||||
|
# {
|
||||||
|
# FIELD: "Acronym",
|
||||||
|
# VALIDATION: []
|
||||||
|
# },
|
||||||
|
# {
|
||||||
|
# FIELD: "Marker",
|
||||||
|
# VALIDATION: []
|
||||||
|
# },
|
||||||
|
# ],
|
||||||
|
# },
|
||||||
|
}
|
||||||
|
|
||||||
|
CROSS_REF_CONF = {
|
||||||
|
ONTOBIOTOPE: ['ID', 'Name'],
|
||||||
|
LITERATURE_SHEET: ['ID'],
|
||||||
|
LOCATIONS: ['Locality'],
|
||||||
|
GROWTH_MEDIA: ['Acronym'],
|
||||||
|
STRAINS: ["Accession number"],
|
||||||
|
SEXUAL_STATE_SHEET: []
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
MIRRI_20200601_VALLIDATION_CONF = {
|
||||||
|
'sheet_schema': SHEETS_SCHEMA,
|
||||||
|
'cross_ref_conf': CROSS_REF_CONF,
|
||||||
|
'keep_sheets_in_memory': [
|
||||||
|
{'sheet_name': LOCATIONS, 'indexed_by': 'Locality'}]
|
||||||
|
}
|
||||||
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
openpyxl
|
||||||
|
requests
|
||||||
|
requests_oauthlib
|
||||||
|
pycountry
|
||||||
|
deepdiff
|
||||||
35
setup.py
Normal file
35
setup.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
import setuptools
|
||||||
|
from pathlib import Path
|
||||||
|
from setuptools import find_packages
|
||||||
|
|
||||||
|
with open("README.md", "r") as fh:
|
||||||
|
long_description = fh.read()
|
||||||
|
|
||||||
|
requirements = [line.strip() for line in open('requirements.txt')]
|
||||||
|
scripts = [str(f) for f in Path('./bin').glob('*.py')]
|
||||||
|
|
||||||
|
setuptools.setup(
|
||||||
|
name="Mirri utils", # Replace with your own username
|
||||||
|
version=0.1,
|
||||||
|
author="P.Ziarsolo",
|
||||||
|
author_email="pziarsolo@gmail.com",
|
||||||
|
description="A small library to help dealing with MIRRI data",
|
||||||
|
long_description=long_description,
|
||||||
|
long_description_content_type="text/markdown",
|
||||||
|
url="https://github.com/pziarsolo/mirri_utils",
|
||||||
|
packages=find_packages(),
|
||||||
|
package_data={"mirri": ['data/ontobiotopes.csv']},
|
||||||
|
# package_dir={"mirri.entities": "mirri.entities"
|
||||||
|
# "mirri.io.parsers": "mirri.io.parsers",
|
||||||
|
# "mirri.io.writers": "mirri.io.writers",
|
||||||
|
# 'mirri.validation': 'mirri.vallidation'},
|
||||||
|
install_requires=requirements,
|
||||||
|
scripts=scripts,
|
||||||
|
license="GNU General Public License v3.0",
|
||||||
|
classifiers=[
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
],
|
||||||
|
python_requires='>=3.6',
|
||||||
|
)
|
||||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
0
tests/biolomics/__init__.py
Normal file
0
tests/biolomics/__init__.py
Normal file
22
tests/biolomics/test_auth_operations.py
Normal file
22
tests/biolomics/test_auth_operations.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import unittest
|
||||||
|
|
||||||
|
from mirri.biolomics.remote.rest_client import BiolomicsClient
|
||||||
|
try:
|
||||||
|
from mirri.biolomics.secrets import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
'You need a secrets.py in the project dir. with CLIENT_ID, SECRET_ID, USERNAME, PASSWORD')
|
||||||
|
|
||||||
|
from .utils import VERSION, SERVER_URL
|
||||||
|
|
||||||
|
|
||||||
|
class BiolomicsClientAuthTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_authentication(self):
|
||||||
|
client = BiolomicsClient(SERVER_URL, VERSION, CLIENT_ID, SECRET_ID,
|
||||||
|
USERNAME, PASSWORD)
|
||||||
|
access1 = client.get_access_token()
|
||||||
|
access2 = client.get_access_token()
|
||||||
|
assert access1 is not None
|
||||||
|
self.assertEqual(access1, access2)
|
||||||
|
|
||||||
62
tests/biolomics/test_growth_medium_operations.py
Normal file
62
tests/biolomics/test_growth_medium_operations.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
import unittest
|
||||||
|
|
||||||
|
from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS
|
||||||
|
from mirri.biolomics.serializers.growth_media import GrowthMedium
|
||||||
|
from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
|
||||||
|
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||||
|
from tests.biolomics.utils import SERVER_URL, VERSION
|
||||||
|
|
||||||
|
|
||||||
|
class BiolomicsSequenceClientTest(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
|
||||||
|
SECRET_ID, USERNAME, PASSWORD)
|
||||||
|
|
||||||
|
def test_retrieve_media_by_id(self):
|
||||||
|
record_id = 101
|
||||||
|
growth_medium = self.client.retrieve_by_id('growth_medium', record_id)
|
||||||
|
self.assertEqual(growth_medium.record_id, record_id)
|
||||||
|
|
||||||
|
self.assertEqual(growth_medium.record_name, 'MA2PH6')
|
||||||
|
|
||||||
|
def test_retrieve_media_by_id(self):
|
||||||
|
record_name = 'MA2PH6'
|
||||||
|
record_id = 101
|
||||||
|
growth_medium = self.client.retrieve_by_name('growth_medium', record_name)
|
||||||
|
self.assertEqual(growth_medium.record_id, record_id)
|
||||||
|
self.assertEqual(growth_medium.record_name, record_name)
|
||||||
|
|
||||||
|
def test_create_growth_media(self):
|
||||||
|
self.client.start_transaction()
|
||||||
|
try:
|
||||||
|
growth_medium = GrowthMedium()
|
||||||
|
growth_medium.acronym = 'BBB'
|
||||||
|
growth_medium.ingredients = 'alkhdflakhf'
|
||||||
|
growth_medium.description = 'desc'
|
||||||
|
|
||||||
|
new_growth_medium = self.client.create(GROWTH_MEDIUM_WS, growth_medium)
|
||||||
|
print(new_growth_medium.dict())
|
||||||
|
finally:
|
||||||
|
self.client.rollback()
|
||||||
|
|
||||||
|
def test_update_growth_media(self):
|
||||||
|
self.client.start_transaction()
|
||||||
|
try:
|
||||||
|
growth_medium = GrowthMedium()
|
||||||
|
growth_medium.acronym = 'BBB'
|
||||||
|
growth_medium.ingredients = 'alkhdflakhf'
|
||||||
|
growth_medium.description = 'desc'
|
||||||
|
growth_medium.full_description = 'full'
|
||||||
|
new_growth_medium = self.client.create(GROWTH_MEDIUM_WS, growth_medium)
|
||||||
|
|
||||||
|
new_growth_medium.full_description = 'full2'
|
||||||
|
updated_gm = new_growth_medium = self.client.update(GROWTH_MEDIUM_WS, new_growth_medium)
|
||||||
|
self.assertEqual(updated_gm.full_description, new_growth_medium.full_description)
|
||||||
|
|
||||||
|
retrieved = self.client.retrieve_by_id(GROWTH_MEDIUM_WS, new_growth_medium.record_id)
|
||||||
|
self.assertEqual(retrieved.full_description, updated_gm.full_description)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
self.client.rollback()
|
||||||
|
|
||||||
|
|
||||||
46
tests/biolomics/test_literature_operations.py
Normal file
46
tests/biolomics/test_literature_operations.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
import unittest
|
||||||
|
|
||||||
|
from .utils import VERSION, SERVER_URL
|
||||||
|
from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
|
||||||
|
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient, BIBLIOGRAPHY_WS
|
||||||
|
from mirri.entities.publication import Publication
|
||||||
|
|
||||||
|
|
||||||
|
class BiolomicsLiteratureClientTest(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
|
||||||
|
SECRET_ID, USERNAME, PASSWORD)
|
||||||
|
|
||||||
|
def test_retrieve_biblio_by_id(self):
|
||||||
|
record_id = 100
|
||||||
|
record_name = "Miscellaneous notes on Mucoraceae"
|
||||||
|
biblio = self.client.retrieve_by_id(BIBLIOGRAPHY_WS, record_id)
|
||||||
|
self.assertEqual(biblio.record_id, record_id)
|
||||||
|
|
||||||
|
self.assertEqual(biblio.record_name, record_name)
|
||||||
|
|
||||||
|
def test_retrieve_media_by_id(self):
|
||||||
|
record_id = 100
|
||||||
|
record_name = "Miscellaneous notes on Mucoraceae"
|
||||||
|
biblio = self.client.retrieve_by_name(BIBLIOGRAPHY_WS, record_name)
|
||||||
|
self.assertEqual(biblio.record_id, record_id)
|
||||||
|
self.assertEqual(biblio.record_name, record_name)
|
||||||
|
self.assertEqual(biblio.year, 1994)
|
||||||
|
self.assertEqual(biblio.volume, '50')
|
||||||
|
|
||||||
|
def test_create_biblio(self):
|
||||||
|
pub = Publication()
|
||||||
|
pub.pubmed_id = 'PM18192'
|
||||||
|
pub.journal = 'my_journal'
|
||||||
|
pub.title = 'awesome title'
|
||||||
|
pub.authors = 'pasdas, aposjdasd, alsalsfda'
|
||||||
|
pub.volume = 'volume 0'
|
||||||
|
record_id = None
|
||||||
|
try:
|
||||||
|
new_pub = self.client.create(BIBLIOGRAPHY_WS, pub)
|
||||||
|
record_id = new_pub.record_id
|
||||||
|
self.assertEqual(new_pub.title, pub.title)
|
||||||
|
self.assertEqual(new_pub.volume, pub.volume)
|
||||||
|
finally:
|
||||||
|
if record_id is not None:
|
||||||
|
self.client.delete_by_id(BIBLIOGRAPHY_WS, record_id)
|
||||||
49
tests/biolomics/test_sequence_operations.py
Normal file
49
tests/biolomics/test_sequence_operations.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
import unittest
|
||||||
|
|
||||||
|
from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
|
||||||
|
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||||
|
from mirri.biolomics.serializers.sequence import GenomicSequenceBiolomics
|
||||||
|
from .utils import VERSION, SERVER_URL
|
||||||
|
|
||||||
|
|
||||||
|
class BiolomicsSequenceClientTest(unittest.TestCase):
|
||||||
|
def setUp(self) -> None:
|
||||||
|
self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
|
||||||
|
SECRET_ID, USERNAME, PASSWORD)
|
||||||
|
|
||||||
|
def test_retrieve_seq_by_id(self):
|
||||||
|
record_id = 101
|
||||||
|
sequence = self.client.retrieve_by_id('sequence', record_id)
|
||||||
|
|
||||||
|
self.assertEqual(sequence.record_id, record_id)
|
||||||
|
self.assertEqual(sequence.record_name, 'MUM 02.54 - CaM')
|
||||||
|
self.assertEqual(sequence.marker_type, 'CaM')
|
||||||
|
|
||||||
|
def test_retrieve_seq_by_name(self):
|
||||||
|
record_name = 'MUM 02.54 - CaM'
|
||||||
|
sequence = self.client.retrieve_by_name('sequence', record_name)
|
||||||
|
|
||||||
|
self.assertEqual(sequence.record_id, 101)
|
||||||
|
self.assertEqual(sequence.record_name, record_name)
|
||||||
|
self.assertEqual(sequence.marker_type, 'CaM')
|
||||||
|
|
||||||
|
def test_create_delete_sequence(self):
|
||||||
|
marker = GenomicSequenceBiolomics()
|
||||||
|
marker.marker_id = 'GGAAUUA'
|
||||||
|
marker.marker_seq = 'aattgacgat'
|
||||||
|
marker.marker_type = 'CaM'
|
||||||
|
marker.record_name = 'peioMarker'
|
||||||
|
|
||||||
|
new_marker = self.client.create('sequence', marker)
|
||||||
|
self.assertEqual(new_marker.marker_id, 'GGAAUUA')
|
||||||
|
self.assertEqual(new_marker.marker_seq, 'aattgacgat')
|
||||||
|
self.assertEqual(new_marker.marker_type, 'CaM')
|
||||||
|
self.assertEqual(new_marker.record_name, 'peioMarker')
|
||||||
|
self.assertTrue(new_marker.record_id)
|
||||||
|
|
||||||
|
self.client.delete_by_id('sequence', new_marker.record_id)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# import sys;sys.argv = ['', 'BiolomicsClient.Test.test_get_strain_by_id']
|
||||||
|
unittest.main()
|
||||||
727
tests/biolomics/test_serializers.py
Normal file
727
tests/biolomics/test_serializers.py
Normal file
@ -0,0 +1,727 @@
|
|||||||
|
import unittest
|
||||||
|
import pycountry
|
||||||
|
import deepdiff
|
||||||
|
from pprint import pprint
|
||||||
|
from mirri.biolomics.serializers.sequence import (
|
||||||
|
GenomicSequenceBiolomics,
|
||||||
|
serialize_to_biolomics as sequence_to_biolomics,
|
||||||
|
serialize_from_biolomics as sequence_from_biolomics)
|
||||||
|
|
||||||
|
from mirri.biolomics.serializers.strain import (
|
||||||
|
serialize_to_biolomics as strain_to_biolomics,
|
||||||
|
serialize_from_biolomics as strain_from_biolomics)
|
||||||
|
from mirri.biolomics.serializers.growth_media import (
|
||||||
|
# serialize_to_biolomics as growth_medium_to_biolomics,
|
||||||
|
serialize_from_biolomics as growth_medium_from_biolomics)
|
||||||
|
from mirri.biolomics.serializers.bibliography import (
|
||||||
|
serializer_from_biolomics as literature_from_biolomics,
|
||||||
|
serializer_to_biolomics as literature_to_biolomics
|
||||||
|
)
|
||||||
|
from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
|
||||||
|
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||||
|
from mirri.entities.publication import Publication
|
||||||
|
from .utils import create_full_data_strain, VERSION, SERVER_URL
|
||||||
|
|
||||||
|
|
||||||
|
STRAIN_WS = {
|
||||||
|
'CreationDate': '2021-05-19T12:22:33',
|
||||||
|
'CreatorUserName': 'pziarsolo@cect.org',
|
||||||
|
'LastChangeDate': '2021-05-19T12:22:36',
|
||||||
|
'LastChangeUserName': 'pziarsolo@cect.org',
|
||||||
|
'RecordDetails': {'ABS related files': {'FieldType': 21,
|
||||||
|
'Value': [{'Name': 'link',
|
||||||
|
'Value': 'https://example.com'}]},
|
||||||
|
'Altitude of geographic origin': {'FieldType': 4,
|
||||||
|
'Value': 121.0},
|
||||||
|
'Applications': {'FieldType': 5, 'Value': 'health'},
|
||||||
|
'Catalog URL': {'FieldType': 21, 'Value': []},
|
||||||
|
'Collection accession number': {'FieldType': 5,
|
||||||
|
'Value': 'TESTCC 1'},
|
||||||
|
'Collection date': {'FieldType': 8, 'Value': '1991/01/01'},
|
||||||
|
'Collector': {'FieldType': 5, 'Value': 'the collector'},
|
||||||
|
'Comment on taxonomy': {'FieldType': 5,
|
||||||
|
'Value': 'lalalalla'},
|
||||||
|
'Coordinates of geographic origin': {'FieldType': 12,
|
||||||
|
'Value': {'Altitude': 0.0,
|
||||||
|
'Latitude': 23.3,
|
||||||
|
'Longitude': 23.3,
|
||||||
|
'Precision': 0.0}},
|
||||||
|
'Country': {'FieldType': 118,
|
||||||
|
'Value': [{'Name': {'FieldType': 5,
|
||||||
|
'Value': 'Spain'},
|
||||||
|
'RecordId': 54,
|
||||||
|
'TargetFieldValue': None}]},
|
||||||
|
'Data provided by': {'FieldType': 22, 'Value': 'Unknown'},
|
||||||
|
'Date of inclusion in the catalogue': {'FieldType': 8,
|
||||||
|
'Value': '1985/05/02'},
|
||||||
|
'Deposit date': {'FieldType': 8, 'Value': '1985/05/02'},
|
||||||
|
'Depositor': {'FieldType': 5,
|
||||||
|
'Value': 'NCTC, National Collection of Type '
|
||||||
|
'Cultures - NCTC, London, United '
|
||||||
|
'Kingdom of Great Britain and '
|
||||||
|
'Northern Ireland.'},
|
||||||
|
'Dual use': {'FieldType': 20, 'Value': 'yes'},
|
||||||
|
'Enzyme production': {'FieldType': 5,
|
||||||
|
'Value': 'some enzimes'},
|
||||||
|
'Form': {'FieldType': 3,
|
||||||
|
'Value': [{'Name': 'Agar', 'Value': 'yes'},
|
||||||
|
{'Name': 'Cryo', 'Value': 'no'},
|
||||||
|
{'Name': 'Dry Ice', 'Value': 'no'},
|
||||||
|
{'Name': 'Liquid Culture Medium',
|
||||||
|
'Value': 'no'},
|
||||||
|
{'Name': 'Lyo', 'Value': 'yes'},
|
||||||
|
{'Name': 'Oil', 'Value': 'no'},
|
||||||
|
{'Name': 'Water', 'Value': 'no'}]},
|
||||||
|
'GMO': {'FieldType': 22, 'Value': 'Yes'},
|
||||||
|
'GMO construction information': {'FieldType': 5,
|
||||||
|
'Value': 'instructrion to '
|
||||||
|
'build'},
|
||||||
|
'Genotype': {'FieldType': 5, 'Value': 'some genotupe'},
|
||||||
|
'Geographic origin': {'FieldType': 5,
|
||||||
|
'Value': 'una state; one '
|
||||||
|
'municipality; somewhere in '
|
||||||
|
'the world'},
|
||||||
|
'History': {'FieldType': 5,
|
||||||
|
'Value': 'newer < In the middle < older'},
|
||||||
|
'Infrasubspecific names': {'FieldType': 5,
|
||||||
|
'Value': 'serovar tete'},
|
||||||
|
'Interspecific hybrid': {'FieldType': 20, 'Value': 'no'},
|
||||||
|
'Isolation date': {'FieldType': 8, 'Value': '1900/01/01'},
|
||||||
|
'Isolation habitat': {'FieldType': 5,
|
||||||
|
'Value': 'some habitat'},
|
||||||
|
'Isolator': {'FieldType': 5, 'Value': 'the isolator'},
|
||||||
|
'Literature': {'FieldType': 118, 'Value': []},
|
||||||
|
'MTA files URL': {'FieldType': 21,
|
||||||
|
'Value': [{'Name': 'link',
|
||||||
|
'Value': 'https://example.com'}]},
|
||||||
|
'MTA text': {'FieldType': 5, 'Value': ''},
|
||||||
|
'Metabolites production': {'FieldType': 5,
|
||||||
|
'Value': 'big factory of cheese'},
|
||||||
|
'Mutant information': {'FieldType': 5, 'Value': 'x-men'},
|
||||||
|
'Nagoya protocol restrictions and compliance conditions': {'FieldType': 20,
|
||||||
|
'Value': 'no '
|
||||||
|
'known '
|
||||||
|
'restrictions '
|
||||||
|
'under '
|
||||||
|
'the '
|
||||||
|
'Nagoya '
|
||||||
|
'protocol'},
|
||||||
|
'Ontobiotope': {'FieldType': 118,
|
||||||
|
'Value': [{'Name': {'FieldType': 5,
|
||||||
|
'Value': 'anaerobic '
|
||||||
|
'bioreactor '
|
||||||
|
'(OBT:000190)'},
|
||||||
|
'RecordId': 100,
|
||||||
|
'TargetFieldValue': None}]},
|
||||||
|
'Ontobiotope term for the isolation habitat': {'FieldType': 5,
|
||||||
|
'Value': ''},
|
||||||
|
'Orders': {'FieldType': 118, 'Value': []},
|
||||||
|
'Organism type': {'FieldType': 3,
|
||||||
|
'Value': [{'Name': 'Algae', 'Value': 'no'},
|
||||||
|
{'Name': 'Archaea',
|
||||||
|
'Value': 'yes'},
|
||||||
|
{'Name': 'Bacteria',
|
||||||
|
'Value': 'no'},
|
||||||
|
{'Name': 'Cyanobacteria',
|
||||||
|
'Value': 'no'},
|
||||||
|
{'Name': 'Filamentous Fungi',
|
||||||
|
'Value': 'no'},
|
||||||
|
{'Name': 'Phage', 'Value': 'no'},
|
||||||
|
{'Name': 'Plasmid',
|
||||||
|
'Value': 'no'},
|
||||||
|
{'Name': 'Virus', 'Value': 'no'},
|
||||||
|
{'Name': 'Yeast', 'Value': 'no'},
|
||||||
|
{'Name': 'Microalgae',
|
||||||
|
'Value': '?'}]},
|
||||||
|
'Other culture collection numbers': {'FieldType': 5,
|
||||||
|
'Value': 'aaa a; aaa3 '
|
||||||
|
'a3'},
|
||||||
|
'Other denomination': {'FieldType': 5, 'Value': ''},
|
||||||
|
'Pathogenicity': {'FieldType': 5, 'Value': 'illness'},
|
||||||
|
'Plasmids': {'FieldType': 5, 'Value': 'asda'},
|
||||||
|
'Plasmids collections fields': {'FieldType': 5,
|
||||||
|
'Value': 'asdasda'},
|
||||||
|
'Ploidy': {'FieldType': 20, 'Value': 'Polyploid'},
|
||||||
|
'Quarantine in Europe': {'FieldType': 20, 'Value': 'no'},
|
||||||
|
'Recommended growth medium': {'FieldType': 118,
|
||||||
|
'Value': [{'Name': {'FieldType': 5,
|
||||||
|
'Value': 'AAA'},
|
||||||
|
'RecordId': 1,
|
||||||
|
'TargetFieldValue': None}]},
|
||||||
|
'Recommended growth temperature': {'FieldType': 19,
|
||||||
|
'MaxValue': 30.0,
|
||||||
|
'MinValue': 30.0},
|
||||||
|
'Remarks': {'FieldType': 5, 'Value': 'no remarks for me'},
|
||||||
|
'Restrictions on use': {'FieldType': 20,
|
||||||
|
'Value': 'no restriction apply'},
|
||||||
|
'Risk group': {'FieldType': 20, 'Value': '1'},
|
||||||
|
'Sequences 16s': {"Value": [
|
||||||
|
{
|
||||||
|
"Name": {
|
||||||
|
"Value": "X76436",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"RecordId": 50992,
|
||||||
|
"TargetFieldValue": {
|
||||||
|
"Value": {
|
||||||
|
"Sequence": ""
|
||||||
|
},
|
||||||
|
"FieldType": 14
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"FieldType": 114},
|
||||||
|
'Sequences 18S rRNA': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences 23S rRNA': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences ACT': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences AmdS': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences Amds12': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences Beta tubulin': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences COX1': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences COX2': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences CaM': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences Cct8': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences Cit1': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences CypA': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences GDP': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences GPD': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences Genome': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences HIS': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences HSP': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences IDH': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences IGS': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences ITS': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences LSU': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences MAT': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences MAT1': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences Miscellaneous': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences NorA': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences NorB': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences Omt12': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences OmtA': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences PcCYP': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences PpgA': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences PreA': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences PreB': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences RAPD': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences RPB1': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences RPB2': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences SSU': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences TEF1a': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences TEF2': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences TUB': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences Tsr1': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences c16S rRNA': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences cbhI': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences mcm7': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sequences rbcL': {'FieldType': 114, 'Value': []},
|
||||||
|
'Sexual state': {'FieldType': 5, 'Value': 'MT+A'},
|
||||||
|
'Status': {'FieldType': 5,
|
||||||
|
'Value': 'type of Bacillus alcalophilus'},
|
||||||
|
'Strain from a registered collection': {'FieldType': 20,
|
||||||
|
'Value': 'no'},
|
||||||
|
'Substrate of isolation': {'FieldType': 5,
|
||||||
|
'Value': 'some substrate'},
|
||||||
|
'Taxon name': {'FieldType': 109,
|
||||||
|
'Value': [{'Name': {'FieldType': 5,
|
||||||
|
'Value': 'Escherichia '
|
||||||
|
'coli'},
|
||||||
|
'RecordId': 100004123,
|
||||||
|
'TargetFieldValue': {'DesktopInfo': None,
|
||||||
|
'DesktopInfoHtml': '<b>Current '
|
||||||
|
'name: '
|
||||||
|
'</b><i>Escherichia '
|
||||||
|
'coli</i> '
|
||||||
|
'(Migula '
|
||||||
|
'1895) '
|
||||||
|
'Castellani '
|
||||||
|
'and '
|
||||||
|
'Chalmers '
|
||||||
|
'1919',
|
||||||
|
'FieldType': 27,
|
||||||
|
'NewSynFieldInfo': None,
|
||||||
|
'ObligateSynonymId': 0,
|
||||||
|
'OriginalSynFieldInfo': None,
|
||||||
|
'SynInfo': {'BasionymRecord': {'NameInfo': '',
|
||||||
|
'RecordId': 100004123,
|
||||||
|
'RecordName': '<i>Escherichia '
|
||||||
|
'coli</i> '
|
||||||
|
'(Migula '
|
||||||
|
'1895) '
|
||||||
|
'Castellani '
|
||||||
|
'and '
|
||||||
|
'Chalmers '
|
||||||
|
'1919',
|
||||||
|
'SecondLevelRecords': None},
|
||||||
|
'CurrentNameRecord': {'NameInfo': '',
|
||||||
|
'RecordId': 100004123,
|
||||||
|
'RecordName': '<i>Escherichia '
|
||||||
|
'coli</i> '
|
||||||
|
'(Migula '
|
||||||
|
'1895) '
|
||||||
|
'Castellani '
|
||||||
|
'and '
|
||||||
|
'Chalmers '
|
||||||
|
'1919',
|
||||||
|
'SecondLevelRecords': None},
|
||||||
|
'ObligateSynonymRecords': [],
|
||||||
|
'SelectedRecord': {
|
||||||
|
'NameInfo': '<i>Escherichia '
|
||||||
|
'coli</i> '
|
||||||
|
'(Migula '
|
||||||
|
'1895) '
|
||||||
|
'Castellani '
|
||||||
|
'and '
|
||||||
|
'Chalmers '
|
||||||
|
'1919',
|
||||||
|
'RecordId': 100004123,
|
||||||
|
'RecordName': '<i>Escherichia '
|
||||||
|
'coli</i> '
|
||||||
|
'(Migula '
|
||||||
|
'1895) '
|
||||||
|
'Castellani '
|
||||||
|
'and '
|
||||||
|
'Chalmers '
|
||||||
|
'1919',
|
||||||
|
'SecondLevelRecords': None},
|
||||||
|
'TaxonSynonymsRecords': []},
|
||||||
|
'SynonymId': 100004123}}]},
|
||||||
|
'Tested temperature growth range': {'FieldType': 19,
|
||||||
|
'MaxValue': 32.0,
|
||||||
|
'MinValue': 29.0},
|
||||||
|
'Type description': {'FieldType': 5, 'Value': ''}},
|
||||||
|
'RecordId': 148038,
|
||||||
|
'RecordName': 'MIRRI 2240561'}
|
||||||
|
|
||||||
|
STRAIN_WS_EXPECTED_NO_REMOTE = {
|
||||||
|
'Acronym': 'MIRRI',
|
||||||
|
'RecordDetails': {'ABS related files': {'FieldType': 'U',
|
||||||
|
'Value': [{'Name': 'link',
|
||||||
|
'Value': 'https://example.com'}]},
|
||||||
|
'Altitude of geographic origin': {'FieldType': 'D',
|
||||||
|
'Value': 121},
|
||||||
|
'Applications': {'FieldType': 'E', 'Value': 'health'},
|
||||||
|
'Collection accession number': {'FieldType': 'E',
|
||||||
|
'Value': 'TESTCC 1'},
|
||||||
|
'Collection date': {'FieldType': 'H', 'Value': '1991-01-01'},
|
||||||
|
'Collector': {'FieldType': 'E', 'Value': 'the collector'},
|
||||||
|
'Comment on taxonomy': {'FieldType': 'E',
|
||||||
|
'Value': 'lalalalla'},
|
||||||
|
'Coordinates of geographic origin': {'FieldType': 'L',
|
||||||
|
'Value': {'Latitude': 23.3,
|
||||||
|
'Longitude': 23.3}},
|
||||||
|
'Date of inclusion in the catalogue': {'FieldType': 'H',
|
||||||
|
'Value': '1985-05-02'},
|
||||||
|
'Deposit date': {'FieldType': 'H', 'Value': '1985-05-02'},
|
||||||
|
'Depositor': {'FieldType': 'E',
|
||||||
|
'Value': 'NCTC, National Collection of Type '
|
||||||
|
'Cultures - NCTC, London, United '
|
||||||
|
'Kingdom of Great Britain and '
|
||||||
|
'Northern Ireland.'},
|
||||||
|
'Dual use': {'FieldType': 'T', 'Value': 'yes'},
|
||||||
|
'Enzyme production': {'FieldType': 'E',
|
||||||
|
'Value': 'some enzimes'},
|
||||||
|
'Form': {'FieldType': 'C',
|
||||||
|
'Value': [{'Name': 'Agar', 'Value': 'yes'},
|
||||||
|
{'Name': 'Cryo', 'Value': 'no'},
|
||||||
|
{'Name': 'Dry Ice', 'Value': 'no'},
|
||||||
|
{'Name': 'Liquid Culture Medium',
|
||||||
|
'Value': 'no'},
|
||||||
|
{'Name': 'Lyo', 'Value': 'yes'},
|
||||||
|
{'Name': 'Oil', 'Value': 'no'},
|
||||||
|
{'Name': 'Water', 'Value': 'no'}]},
|
||||||
|
'GMO': {'FieldType': 'V', 'Value': 'Yes'},
|
||||||
|
'GMO construction information': {'FieldType': 'E',
|
||||||
|
'Value': 'instructrion to '
|
||||||
|
'build'},
|
||||||
|
'Genotype': {'FieldType': 'E', 'Value': 'some genotupe'},
|
||||||
|
'Geographic origin': {'FieldType': 'E',
|
||||||
|
'Value': 'una state; one '
|
||||||
|
'municipality; somewhere in '
|
||||||
|
'the world'},
|
||||||
|
'History': {'FieldType': 'E',
|
||||||
|
'Value': 'firstplave < seconn place < third '
|
||||||
|
'place'},
|
||||||
|
'Infrasubspecific names': {'FieldType': 'E',
|
||||||
|
'Value': 'serovar tete'},
|
||||||
|
'Interspecific hybrid': {'FieldType': 'T', 'Value': 'no'},
|
||||||
|
'Isolation date': {'FieldType': 'H', 'Value': '1900-01-01'},
|
||||||
|
'Isolation habitat': {'FieldType': 'E',
|
||||||
|
'Value': 'some habitat'},
|
||||||
|
'Isolator': {'FieldType': 'E', 'Value': 'the isolator'},
|
||||||
|
'MTA files URL': {'FieldType': 'U',
|
||||||
|
'Value': [{'Name': 'link',
|
||||||
|
'Value': 'https://example.com'}]},
|
||||||
|
'Metabolites production': {'FieldType': 'E',
|
||||||
|
'Value': 'big factory of cheese'},
|
||||||
|
'Mutant information': {'FieldType': 'E', 'Value': 'x-men'},
|
||||||
|
'Nagoya protocol restrictions and compliance conditions': {'FieldType': 'T',
|
||||||
|
'Value': 'no '
|
||||||
|
'known '
|
||||||
|
'restrictions '
|
||||||
|
'under '
|
||||||
|
'the '
|
||||||
|
'Nagoya '
|
||||||
|
'protocol'},
|
||||||
|
'Ontobiotope': {'FieldType': 'RLink', 'Value': 'OBT:000190'},
|
||||||
|
'Organism type': {'FieldType': 'C',
|
||||||
|
'Value': [{'Name': 'Algae', 'Value': 'no'},
|
||||||
|
{'Name': 'Archaea',
|
||||||
|
'Value': 'yes'},
|
||||||
|
{'Name': 'Bacteria',
|
||||||
|
'Value': 'no'},
|
||||||
|
{'Name': 'Cyanobacteria',
|
||||||
|
'Value': 'no'},
|
||||||
|
{'Name': 'Filamentous Fungi',
|
||||||
|
'Value': 'no'},
|
||||||
|
{'Name': 'Phage', 'Value': 'no'},
|
||||||
|
{'Name': 'Plasmid',
|
||||||
|
'Value': 'no'},
|
||||||
|
{'Name': 'Virus', 'Value': 'no'},
|
||||||
|
{'Name': 'Yeast',
|
||||||
|
'Value': 'no'}]},
|
||||||
|
'Other culture collection numbers': {'FieldType': 'E',
|
||||||
|
'Value': 'aaa a; aaa3 '
|
||||||
|
'a3'},
|
||||||
|
'Pathogenicity': {'FieldType': 'E', 'Value': 'illness'},
|
||||||
|
'Plasmids': {'FieldType': 'E', 'Value': 'asda'},
|
||||||
|
'Plasmids collections fields': {'FieldType': 'E',
|
||||||
|
'Value': 'asdasda'},
|
||||||
|
'Ploidy': {'FieldType': 'T', 'Value': 'Polyploid'},
|
||||||
|
'Quarantine in Europe': {'FieldType': 'T', 'Value': 'no'},
|
||||||
|
'Recommended growth temperature': {'FieldType': 'S',
|
||||||
|
'MaxValue': 30.0,
|
||||||
|
'MinValue': 30.0},
|
||||||
|
'Remarks': {'FieldType': 'E', 'Value': 'no remarks for me'},
|
||||||
|
'Restrictions on use': {'FieldType': 'T',
|
||||||
|
'Value': 'no restriction apply'},
|
||||||
|
'Risk group': {'FieldType': 'T', 'Value': '1'},
|
||||||
|
'Sexual state': {'FieldType': 'E', 'Value': 'MT+A'},
|
||||||
|
'Status': {'FieldType': 'E',
|
||||||
|
'Value': 'type of Bacillus alcalophilus'},
|
||||||
|
'Strain from a registered collection': {'FieldType': 'T',
|
||||||
|
'Value': 'no'},
|
||||||
|
'Substrate of isolation': {'FieldType': 'E',
|
||||||
|
'Value': 'some substrate'},
|
||||||
|
'Taxon name': {'FieldType': 'SynLink',
|
||||||
|
'Value': 'Escherichia coli'},
|
||||||
|
'Tested temperature growth range': {'FieldType': 'S',
|
||||||
|
'MaxValue': 32.0,
|
||||||
|
'MinValue': 29.0}}}
|
||||||
|
|
||||||
|
|
||||||
|
class StrainSerializerTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_serialize_to_biolomics(self):
|
||||||
|
strain = create_full_data_strain()
|
||||||
|
ws_strain = strain_to_biolomics(strain, client=None)
|
||||||
|
self.assertDictEqual(ws_strain, STRAIN_WS_EXPECTED_NO_REMOTE)
|
||||||
|
|
||||||
|
def test_serialize_to_biolomics_remote(self):
|
||||||
|
client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
|
||||||
|
SECRET_ID, USERNAME, PASSWORD)
|
||||||
|
strain = create_full_data_strain()
|
||||||
|
marker = GenomicSequenceBiolomics()
|
||||||
|
marker.marker_id = "MUM 02.15 - Beta tubulin"
|
||||||
|
marker.marker_type = 'TUBB'
|
||||||
|
strain.genetics.markers = [marker]
|
||||||
|
ws_strain = strain_to_biolomics(strain, client=client)
|
||||||
|
|
||||||
|
self.assertEqual(strain.collect.habitat_ontobiotope,
|
||||||
|
ws_strain['RecordDetails']['Ontobiotope']['Value'][0]['Name']['Value'])
|
||||||
|
self.assertEqual(pycountry.countries.get(alpha_3=strain.collect.location.country).name,
|
||||||
|
ws_strain['RecordDetails']['Country']['Value'][0]['Name']['Value'])
|
||||||
|
self.assertEqual(strain.publications[0].title,
|
||||||
|
ws_strain['RecordDetails']['Literature']['Value'][0]['Name']['Value'])
|
||||||
|
self.assertEqual(strain.genetics.markers[0].marker_id,
|
||||||
|
ws_strain['RecordDetails']['Sequences TUB']['Value'][0]['Name']['Value'])
|
||||||
|
|
||||||
|
def test_serialize_from_biolomics(self):
|
||||||
|
ws_strain = STRAIN_WS
|
||||||
|
strain = strain_from_biolomics(ws_strain)
|
||||||
|
self.assertEqual(strain.record_id, 148038)
|
||||||
|
self.assertEqual(strain.record_name, 'MIRRI 2240561')
|
||||||
|
self.assertEqual(strain.taxonomy.long_name, 'Escherichia coli')
|
||||||
|
self.assertEqual(strain.growth.recommended_media, ['AAA'])
|
||||||
|
self.assertEqual(strain.collect.location.altitude, 121)
|
||||||
|
self.assertEqual(strain.collect.location.country, 'ESP')
|
||||||
|
self.assertEqual(strain.applications, 'health')
|
||||||
|
self.assertEqual(strain.id.strain_id, 'TESTCC 1')
|
||||||
|
self.assertEqual(strain.collect.date.strfdate, '19910101')
|
||||||
|
self.assertEqual(strain.taxonomy.comments, 'lalalalla')
|
||||||
|
self.assertEqual(strain.catalog_inclusion_date.strfdate, '19850502')
|
||||||
|
self.assertIn('NCTC, National Collection of Type ', strain.deposit.who)
|
||||||
|
self.assertTrue(strain.is_potentially_harmful)
|
||||||
|
self.assertEqual(strain.form_of_supply, ['Agar', 'Lyo'])
|
||||||
|
self.assertTrue(strain.genetics.gmo)
|
||||||
|
self.assertEqual(strain.genetics.gmo_construction, 'instructrion to build')
|
||||||
|
self.assertEqual(strain.genetics.genotype, 'some genotupe')
|
||||||
|
self.assertEqual(strain.history, ['newer', 'In the middle', 'older'])
|
||||||
|
self.assertEqual(strain.taxonomy.infrasubspecific_name, 'serovar tete')
|
||||||
|
self.assertEqual(strain.isolation.who, 'the isolator')
|
||||||
|
self.assertEqual(strain.isolation.date.strfdate, '19000101')
|
||||||
|
self.assertEqual(strain.mta_files, ['https://example.com'])
|
||||||
|
self.assertEqual(strain.genetics.mutant_info, 'x-men')
|
||||||
|
self.assertEqual(strain.collect.habitat_ontobiotope, 'OBT:000190')
|
||||||
|
self.assertEqual(strain.taxonomy.organism_type[0].name, 'Archaea')
|
||||||
|
self.assertEqual(strain.other_numbers[0].strain_id, 'aaa a')
|
||||||
|
self.assertEqual(strain.other_numbers[1].strain_id, 'aaa3 a3')
|
||||||
|
self.assertEqual(strain.pathogenicity, 'illness')
|
||||||
|
self.assertEqual(strain.genetics.plasmids, ['asda'])
|
||||||
|
self.assertEqual(strain.genetics.ploidy, 9)
|
||||||
|
self.assertFalse(strain.is_subject_to_quarantine)
|
||||||
|
self.assertEqual(strain.risk_group, '1')
|
||||||
|
self.assertFalse(strain.is_from_registered_collection)
|
||||||
|
self.assertEqual(strain.growth.tested_temp_range, {'min': 29, 'max': 32})
|
||||||
|
|
||||||
|
|
||||||
|
BIOLOMICSSEQ = {
|
||||||
|
'RecordDetails': {
|
||||||
|
'Barcode level': {'FieldType': 20, 'Value': 'undefined'},
|
||||||
|
'DNA extract number': {'FieldType': 5, 'Value': ''},
|
||||||
|
'DNA sequence': {'FieldType': 14,
|
||||||
|
'Value': {'Sequence': 'caaaggaggccttctccctcttcgtaag'}},
|
||||||
|
'Editing state': {'FieldType': 20, 'Value': 'Auto import'},
|
||||||
|
'Forward primer(s)': {'FieldType': 5, 'Value': ''},
|
||||||
|
'Genbank': {'FieldType': 21, 'Value': []},
|
||||||
|
'INSDC number': {'FieldType': 5, 'Value': 'AATGAT'},
|
||||||
|
'Literature': {'FieldType': 21, 'Value': []},
|
||||||
|
'Literature1': {'FieldType': 118, 'Value': []},
|
||||||
|
'Marker name': {'FieldType': 5, 'Value': 'CaM'},
|
||||||
|
'Privacy': {'FieldType': 20, 'Value': 'undefined'},
|
||||||
|
'Quality': {'FieldType': 5, 'Value': ''},
|
||||||
|
'Remarks': {'FieldType': 5, 'Value': ''},
|
||||||
|
'Reverse primer(s)': {'FieldType': 5, 'Value': ''},
|
||||||
|
'Review state': {'FieldType': 5, 'Value': ''},
|
||||||
|
'Strain number': {'FieldType': 5, 'Value': 'MUM 02.54'}},
|
||||||
|
'RecordId': 101,
|
||||||
|
'RecordName': 'MUM 02.54 - CaM'}
|
||||||
|
|
||||||
|
|
||||||
|
class SequenceSerializerTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_from_biolomics(self):
|
||||||
|
marker = sequence_from_biolomics(BIOLOMICSSEQ)
|
||||||
|
self.assertEqual(marker.record_name, BIOLOMICSSEQ['RecordName'])
|
||||||
|
self.assertEqual(marker.record_id, BIOLOMICSSEQ['RecordId'])
|
||||||
|
self.assertEqual(marker.marker_type, BIOLOMICSSEQ['RecordDetails']['Marker name']['Value'])
|
||||||
|
self.assertEqual(marker.marker_id, BIOLOMICSSEQ['RecordDetails']['INSDC number']['Value'])
|
||||||
|
self.assertEqual(marker.marker_seq, BIOLOMICSSEQ['RecordDetails']['DNA sequence']['Value']['Sequence'])
|
||||||
|
|
||||||
|
def test_to_biolomics(self):
|
||||||
|
marker = GenomicSequenceBiolomics()
|
||||||
|
marker.marker_id = 'GGAAUUA'
|
||||||
|
marker.marker_seq = 'aattgacgat'
|
||||||
|
marker.marker_type = 'CaM'
|
||||||
|
marker.record_name = 'peioMarker'
|
||||||
|
marker.record_id = 111
|
||||||
|
ws_seq = sequence_to_biolomics(marker)
|
||||||
|
expected = {'RecordId': marker.record_id,
|
||||||
|
'RecordName': marker.record_name,
|
||||||
|
'RecordDetails': {
|
||||||
|
'INSDC number': {'Value': marker.marker_id, 'FieldType': 'E'},
|
||||||
|
'DNA sequence': {'Value': {'Sequence': marker.marker_seq}, 'FieldType': 'N'},
|
||||||
|
'Marker name': {'Value': marker.marker_type, 'FieldType': 'E'}}}
|
||||||
|
|
||||||
|
self.assertEqual(ws_seq, expected)
|
||||||
|
|
||||||
|
|
||||||
|
BIOLOMICS_MEDIUM = {
|
||||||
|
"RecordId": 100,
|
||||||
|
"RecordName": "MA20S",
|
||||||
|
"RecordDetails": {
|
||||||
|
"Full description": {
|
||||||
|
"Value": "mout agar+20% saccharose",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Ingredients": {
|
||||||
|
"Value": "Malt extract\r\n\tDilute brewery malt with water to 10% sugar solution (level 10 on Brix saccharose meter), 15 minutes at 121 C\r\nsaccharose\t200g\r\ndistilled water\t0.6l\r\nagar\t15g\r\n",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Link to full description": {
|
||||||
|
"Value": [],
|
||||||
|
"FieldType": 21
|
||||||
|
},
|
||||||
|
"Medium description": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Other name": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"pH": {
|
||||||
|
"Value": "7 with KOH",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Remarks": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Reference": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Sterilization conditions": {
|
||||||
|
"Value": "15 minutes at 121 C",
|
||||||
|
"FieldType": 5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MediumSerializerTest(unittest.TestCase):
|
||||||
|
def test_from_biolomics(self):
|
||||||
|
medium = growth_medium_from_biolomics(BIOLOMICS_MEDIUM)
|
||||||
|
self.assertEqual(medium.record_id, BIOLOMICS_MEDIUM['RecordId'])
|
||||||
|
self.assertEqual(medium.record_name, BIOLOMICS_MEDIUM['RecordName'])
|
||||||
|
self.assertEqual(medium.ingredients, BIOLOMICS_MEDIUM['RecordDetails']['Ingredients']['Value'])
|
||||||
|
self.assertEqual(medium.full_description, BIOLOMICS_MEDIUM['RecordDetails']['Full description']['Value'])
|
||||||
|
self.assertEqual(medium.ph, BIOLOMICS_MEDIUM['RecordDetails']['pH']['Value'])
|
||||||
|
|
||||||
|
|
||||||
|
BIOLOMICS_BIBLIOGRAPHY = {
|
||||||
|
"RecordId": 100,
|
||||||
|
"RecordName": "Miscellaneous notes on Mucoraceae",
|
||||||
|
"RecordDetails": {
|
||||||
|
"Associated strains": {
|
||||||
|
"Value": [],
|
||||||
|
"FieldType": 118
|
||||||
|
},
|
||||||
|
"Associated taxa": {
|
||||||
|
"Value": [],
|
||||||
|
"FieldType": 118
|
||||||
|
},
|
||||||
|
"Authors": {
|
||||||
|
"Value": "Schipper, M.A.A.; Samson, R.A.",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Associated sequences": {
|
||||||
|
"Value": [],
|
||||||
|
"FieldType": 118
|
||||||
|
},
|
||||||
|
"Abstract": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Collection": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"DOI number": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Editor(s)": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Full reference": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Hyperlink": {
|
||||||
|
"Value": [],
|
||||||
|
"FieldType": 21
|
||||||
|
},
|
||||||
|
"ISBN": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"ISSN": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Issue": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Journal": {
|
||||||
|
"Value": "Mycotaxon",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Journal-Book": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Keywords": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Page from": {
|
||||||
|
"Value": "475",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Page to": {
|
||||||
|
"Value": "491",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Publisher": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"PubMed ID": {
|
||||||
|
"Value": "",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Volume": {
|
||||||
|
"Value": "50",
|
||||||
|
"FieldType": 5
|
||||||
|
},
|
||||||
|
"Year": {
|
||||||
|
"Value": 1994,
|
||||||
|
"FieldType": 4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BibliographySerializerTest(unittest.TestCase):
|
||||||
|
def test_from_biolomics(self):
|
||||||
|
pub = literature_from_biolomics(BIOLOMICS_BIBLIOGRAPHY)
|
||||||
|
self.assertEqual(pub.record_name, "Miscellaneous notes on Mucoraceae")
|
||||||
|
self.assertEqual(pub.record_id, 100)
|
||||||
|
self.assertEqual(pub.year, 1994)
|
||||||
|
self.assertEqual(pub.authors, "Schipper, M.A.A.; Samson, R.A.")
|
||||||
|
|
||||||
|
def test_to_biolomics(self):
|
||||||
|
pub = Publication()
|
||||||
|
pub.title = 'My title'
|
||||||
|
pub.year = 1992
|
||||||
|
pub.authors = 'me and myself'
|
||||||
|
pub.pubmed_id = '1112222'
|
||||||
|
pub.issue = 'issue'
|
||||||
|
ws_data = literature_to_biolomics(pub)
|
||||||
|
expected = {
|
||||||
|
'RecordDetails': {
|
||||||
|
'Authors': {'FieldType': 'E', 'Value': 'me and myself'},
|
||||||
|
'PubMed ID': {'FieldType': 'E', 'Value': '1112222'},
|
||||||
|
'Issue': {'FieldType': 'E', 'Value': 'issue'},
|
||||||
|
'Year': {'FieldType': 'D', 'Value': 1992}},
|
||||||
|
'RecordName': 'My title'}
|
||||||
|
self.assertDictEqual(expected, ws_data)
|
||||||
|
|
||||||
|
def test_to_biolomics2(self):
|
||||||
|
pub = Publication()
|
||||||
|
pub.pubmed_id = '1112222'
|
||||||
|
ws_data = literature_to_biolomics(pub)
|
||||||
|
expected = {
|
||||||
|
'RecordDetails': {
|
||||||
|
'PubMed ID': {'FieldType': 'E', 'Value': '1112222'}},
|
||||||
|
'RecordName': f'PUBMED:{pub.pubmed_id}'}
|
||||||
|
self.assertDictEqual(expected, ws_data)
|
||||||
|
|
||||||
|
pub = Publication()
|
||||||
|
pub.doi = 'doi.er/111/12131'
|
||||||
|
ws_data = literature_to_biolomics(pub)
|
||||||
|
expected = {
|
||||||
|
'RecordDetails': {
|
||||||
|
'DOI number': {'FieldType': 'E', 'Value': pub.doi}},
|
||||||
|
'RecordName': f'DOI:{pub.doi}'}
|
||||||
|
self.assertDictEqual(expected, ws_data)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys;
|
||||||
|
sys.argv = ['', 'BibliographySerializerTest']
|
||||||
|
unittest.main()
|
||||||
156
tests/biolomics/test_strain_operations.py
Normal file
156
tests/biolomics/test_strain_operations.py
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
import unittest
|
||||||
|
|
||||||
|
from mirri.biolomics.remote.endoint_names import STRAIN_WS
|
||||||
|
from .utils import VERSION, SERVER_URL, create_full_data_strain
|
||||||
|
from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
|
||||||
|
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||||
|
from mirri.biolomics.pipelines.strain import retrieve_strain_by_accession_number
|
||||||
|
|
||||||
|
|
||||||
|
class BiolomicsStrainClientTest(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
|
||||||
|
SECRET_ID, USERNAME, PASSWORD)
|
||||||
|
|
||||||
|
def test_retrieve_strain_by_id(self):
|
||||||
|
record_id = 14803
|
||||||
|
strain = self.client.retrieve_by_id(STRAIN_WS, record_id)
|
||||||
|
self.assertEqual(strain.record_id, record_id)
|
||||||
|
print(strain.record_name)
|
||||||
|
|
||||||
|
def test_retrieve_strain_by_name(self):
|
||||||
|
record_id = 14803
|
||||||
|
record_name = 'MIRRI0014803'
|
||||||
|
strain = self.client.retrieve_by_name(STRAIN_WS, record_name)
|
||||||
|
self.assertEqual(strain.record_name, record_name)
|
||||||
|
self.assertEqual(strain.record_id, record_id)
|
||||||
|
|
||||||
|
def test_search_strain(self):
|
||||||
|
accession_number = "BEA 0014B"
|
||||||
|
query = {"Query": [{"Index": 0,
|
||||||
|
"FieldName": "Collection accession number",
|
||||||
|
"Operation": "TextExactMatch",
|
||||||
|
"Value": accession_number}],
|
||||||
|
"Expression": "Q0",
|
||||||
|
"DisplayStart": 0,
|
||||||
|
"DisplayLength": 10}
|
||||||
|
|
||||||
|
search_response = self.client.search(STRAIN_WS, query)
|
||||||
|
|
||||||
|
self.assertEqual(search_response['total'], 1)
|
||||||
|
self.assertEqual(search_response['records'][0].id.strain_id,
|
||||||
|
accession_number)
|
||||||
|
|
||||||
|
def test_search_strain4(self):
|
||||||
|
accession_number = "TESTCC 1"
|
||||||
|
query = {"Query": [{"Index": 0,
|
||||||
|
"FieldName": "Collection accession number",
|
||||||
|
"Operation": "TextExactMatch",
|
||||||
|
"Value": accession_number}],
|
||||||
|
"Expression": "Q0",
|
||||||
|
"DisplayStart": 0,
|
||||||
|
"DisplayLength": 10}
|
||||||
|
|
||||||
|
search_response = self.client.search(STRAIN_WS, query)
|
||||||
|
for strain in search_response['records']:
|
||||||
|
print(strain)
|
||||||
|
self.client.delete_by_id(STRAIN_WS, strain.record_id)
|
||||||
|
|
||||||
|
def test_search_strain_no_found(self):
|
||||||
|
accession_number = "BEA 0014B_"
|
||||||
|
query = {"Query": [{"Index": 0,
|
||||||
|
"FieldName": "Collection accession number",
|
||||||
|
"Operation": "TextExactMatch",
|
||||||
|
"Value": accession_number}],
|
||||||
|
"Expression": "Q0",
|
||||||
|
"DisplayStart": 0,
|
||||||
|
"DisplayLength": 10}
|
||||||
|
|
||||||
|
search_response = self.client.search(STRAIN_WS, query)
|
||||||
|
|
||||||
|
self.assertEqual(search_response['total'], 0)
|
||||||
|
self.assertFalse(search_response['records'])
|
||||||
|
|
||||||
|
def test_create_strain(self):
|
||||||
|
strain = create_full_data_strain()
|
||||||
|
strain.taxonomy.interspecific_hybrid = None
|
||||||
|
record_id = None
|
||||||
|
try:
|
||||||
|
new_strain = self.client.create(STRAIN_WS, strain)
|
||||||
|
record_id = new_strain.record_id
|
||||||
|
self.assertIsNone(new_strain.taxonomy.interspecific_hybrid)
|
||||||
|
self.assertEqual(new_strain.growth.recommended_media, ['AAA'])
|
||||||
|
self.assertEqual(new_strain.id.strain_id, strain.id.strain_id)
|
||||||
|
finally:
|
||||||
|
if record_id is not None:
|
||||||
|
self.client.delete_by_id(STRAIN_WS, record_id)
|
||||||
|
|
||||||
|
def test_update_strain(self):
|
||||||
|
strain = create_full_data_strain()
|
||||||
|
record_id = None
|
||||||
|
try:
|
||||||
|
new_strain = self.client.create(STRAIN_WS, strain)
|
||||||
|
record_id = new_strain.record_id
|
||||||
|
self.assertEqual(new_strain.id.strain_id, strain.id.strain_id)
|
||||||
|
self.assertFalse(new_strain.taxonomy.interspecific_hybrid)
|
||||||
|
new_strain.id.number = '2'
|
||||||
|
new_strain.taxonomy.interspecific_hybrid = None
|
||||||
|
updated_strain = self.client.update(STRAIN_WS, new_strain)
|
||||||
|
self.assertEqual(updated_strain.id.strain_id, new_strain.id.strain_id)
|
||||||
|
self.assertIsNone(updated_strain.taxonomy.interspecific_hybrid)
|
||||||
|
|
||||||
|
retrieved_strain = self.client.retrieve_by_id(STRAIN_WS, record_id)
|
||||||
|
self.assertEqual(retrieved_strain.id.strain_id, new_strain.id.strain_id)
|
||||||
|
self.assertIsNone(retrieved_strain.taxonomy.interspecific_hybrid)
|
||||||
|
finally:
|
||||||
|
if record_id is not None:
|
||||||
|
print('deleting')
|
||||||
|
self.client.delete_by_id(STRAIN_WS, record_id)
|
||||||
|
|
||||||
|
def test_update_strain_pathogenicity(self):
|
||||||
|
strain = create_full_data_strain()
|
||||||
|
print(strain.pathogenicity)
|
||||||
|
record_id = None
|
||||||
|
try:
|
||||||
|
new_strain = self.client.create(STRAIN_WS, strain)
|
||||||
|
record_id = new_strain.record_id
|
||||||
|
self.assertEqual(new_strain.id.strain_id, strain.id.strain_id)
|
||||||
|
self.assertEqual(new_strain.pathogenicity, 'illness')
|
||||||
|
|
||||||
|
new_strain.pathogenicity = None
|
||||||
|
updated_strain = self.client.update(STRAIN_WS, new_strain)
|
||||||
|
self.assertEqual(updated_strain.id.strain_id, new_strain.id.strain_id)
|
||||||
|
self.assertIsNone(updated_strain.pathogenicity)
|
||||||
|
|
||||||
|
retrieved_strain = self.client.retrieve_by_id(STRAIN_WS, record_id)
|
||||||
|
self.assertEqual(retrieved_strain.id.strain_id, new_strain.id.strain_id)
|
||||||
|
self.assertIsNone(retrieved_strain.pathogenicity)
|
||||||
|
finally:
|
||||||
|
if record_id is not None:
|
||||||
|
self.client.delete_by_id(STRAIN_WS, record_id)
|
||||||
|
|
||||||
|
def test_search_by_accession_number(self):
|
||||||
|
accession_number = "BEA 0014B"
|
||||||
|
strain = retrieve_strain_by_accession_number(self.client, accession_number)
|
||||||
|
self.assertEqual(strain.id.strain_id, accession_number)
|
||||||
|
|
||||||
|
def test_search_by_accession_number(self):
|
||||||
|
accession_number = "BEA 0014B_"
|
||||||
|
strain = retrieve_strain_by_accession_number(self.client, accession_number)
|
||||||
|
self.assertFalse(strain)
|
||||||
|
|
||||||
|
|
||||||
|
class BiolomicsClientGrowthMediaTest(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
|
||||||
|
SECRET_ID, USERNAME, PASSWORD)
|
||||||
|
|
||||||
|
def xtest_growth_media_by_name(self):
|
||||||
|
gm = self.client.retrieve('growth_media', 'AAA')
|
||||||
|
self.assertEqual(gm['Record Id'], 1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# import sys;sys.argv = ['',
|
||||||
|
# 'BiolomicsWriter.test_mirri_excel_parser_invalid']
|
||||||
|
unittest.main()
|
||||||
99
tests/biolomics/utils.py
Normal file
99
tests/biolomics/utils.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
from mirri.biolomics.serializers.strain import StrainMirri
|
||||||
|
from mirri.entities.strain import StrainId, OrganismType
|
||||||
|
from mirri.entities.sequence import GenomicSequence
|
||||||
|
from mirri.entities.date_range import DateRange
|
||||||
|
from mirri.entities.publication import Publication
|
||||||
|
from mirri.settings import NAGOYA_NO_RESTRICTIONS
|
||||||
|
|
||||||
|
VERSION = 'v2'
|
||||||
|
SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
|
||||||
|
|
||||||
|
|
||||||
|
def create_full_data_strain():
|
||||||
|
strain = StrainMirri()
|
||||||
|
|
||||||
|
strain.id.number = "1"
|
||||||
|
strain.id.collection = "TESTCC"
|
||||||
|
strain.id.url = "https://cect/2342"
|
||||||
|
|
||||||
|
strain.restriction_on_use = "no_restriction"
|
||||||
|
strain.nagoya_protocol = NAGOYA_NO_RESTRICTIONS
|
||||||
|
strain.abs_related_files = ['https://example.com']
|
||||||
|
strain.mta_files = ['https://example.com']
|
||||||
|
strain.other_numbers.append(StrainId(collection="aaa", number="a"))
|
||||||
|
strain.other_numbers.append(StrainId(collection="aaa3", number="a3"))
|
||||||
|
strain.is_from_registered_collection = False
|
||||||
|
strain.risk_group = '1'
|
||||||
|
strain.is_potentially_harmful = True
|
||||||
|
strain.is_subject_to_quarantine = False
|
||||||
|
|
||||||
|
strain.taxonomy.organism_type = [OrganismType(2)]
|
||||||
|
strain.taxonomy.genus = 'Escherichia'
|
||||||
|
strain.taxonomy.species = 'coli'
|
||||||
|
strain.taxonomy.interspecific_hybrid = False
|
||||||
|
strain.taxonomy.infrasubspecific_name = 'serovar tete'
|
||||||
|
strain.taxonomy.comments = 'lalalalla'
|
||||||
|
|
||||||
|
strain.status = "type of Bacillus alcalophilus"
|
||||||
|
strain.history = 'firstplave < seconn place < third place'
|
||||||
|
|
||||||
|
strain.deposit.who = "NCTC, National Collection of Type Cultures - NCTC, London, United Kingdom of Great Britain and Northern Ireland."
|
||||||
|
strain.deposit.date = DateRange(year=1985, month=5, day=2)
|
||||||
|
strain.catalog_inclusion_date = DateRange(year=1985, month=5, day=2)
|
||||||
|
|
||||||
|
strain.collect.location.country = "ESP"
|
||||||
|
strain.collect.location.state = "una state"
|
||||||
|
strain.collect.location.municipality = "one municipality"
|
||||||
|
strain.collect.location.longitude = 23.3
|
||||||
|
strain.collect.location.latitude = 23.3
|
||||||
|
strain.collect.location.altitude = 121
|
||||||
|
strain.collect.location.site = "somewhere in the world"
|
||||||
|
strain.collect.habitat_ontobiotope = "OBT:000190"
|
||||||
|
strain.collect.habitat = 'some habitat'
|
||||||
|
strain.collect.who = "the collector"
|
||||||
|
strain.collect.date = DateRange(year=1991)
|
||||||
|
|
||||||
|
strain.isolation.date = DateRange(year=1900)
|
||||||
|
strain.isolation.who = 'the isolator'
|
||||||
|
strain.isolation.substrate_host_of_isolation = 'some substrate'
|
||||||
|
|
||||||
|
# already existing media in test_mirri
|
||||||
|
|
||||||
|
strain.growth.recommended_temp = {'min': 30, 'max': 30}
|
||||||
|
strain.growth.recommended_media = ["AAA"]
|
||||||
|
strain.growth.tested_temp_range = {'min': 29, 'max': 32}
|
||||||
|
|
||||||
|
strain.form_of_supply = ["Agar", "Lyo"]
|
||||||
|
|
||||||
|
#strain.other_denominations = ["lajdflasjdldj"]
|
||||||
|
|
||||||
|
gen_seq = GenomicSequence()
|
||||||
|
gen_seq.marker_id = "pepe"
|
||||||
|
gen_seq.marker_type = "16S rRNA"
|
||||||
|
strain.genetics.markers.append(gen_seq)
|
||||||
|
strain.genetics.ploidy = 9
|
||||||
|
strain.genetics.genotype = 'some genotupe'
|
||||||
|
strain.genetics.gmo = True
|
||||||
|
strain.genetics.gmo_construction = 'instructrion to build'
|
||||||
|
strain.genetics.mutant_info = 'x-men'
|
||||||
|
strain.genetics.sexual_state = 'MT+A'
|
||||||
|
strain.genetics.plasmids = ['asda']
|
||||||
|
strain.genetics.plasmids_in_collections = ['asdasda']
|
||||||
|
|
||||||
|
pub = Publication()
|
||||||
|
pub.title = "The genus Amylomyces"
|
||||||
|
strain.publications = [pub]
|
||||||
|
|
||||||
|
strain.plant_pathogenicity_code = 'PATH:001'
|
||||||
|
strain.pathogenicity = 'illness'
|
||||||
|
strain.enzyme_production = 'some enzimes'
|
||||||
|
strain.production_of_metabolites = 'big factory of cheese'
|
||||||
|
strain.applications = 'health'
|
||||||
|
|
||||||
|
strain.remarks = 'no remarks for me'
|
||||||
|
return strain
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
strain = create_full_data_strain()
|
||||||
|
print(strain.collect.habitat_ontobiotope)
|
||||||
BIN
tests/data/invalid_content.mirri.xlsx
Normal file
BIN
tests/data/invalid_content.mirri.xlsx
Normal file
Binary file not shown.
5
tests/data/invalid_excel.mirri.json
Normal file
5
tests/data/invalid_excel.mirri.json
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"key1": "value1",
|
||||||
|
"key2": "value2",
|
||||||
|
"key3": "value3"
|
||||||
|
}
|
||||||
BIN
tests/data/invalid_structure.mirri.xlsx
Normal file
BIN
tests/data/invalid_structure.mirri.xlsx
Normal file
Binary file not shown.
BIN
tests/data/valid.mirri.full.xlsx
Normal file
BIN
tests/data/valid.mirri.full.xlsx
Normal file
Binary file not shown.
BIN
tests/data/valid.mirri.xlsx
Normal file
BIN
tests/data/valid.mirri.xlsx
Normal file
Binary file not shown.
318
tests/test_entities.py
Normal file
318
tests/test_entities.py
Normal file
@ -0,0 +1,318 @@
|
|||||||
|
"""
|
||||||
|
Created on 2020(e)ko abe. 2(a)
|
||||||
|
|
||||||
|
@author: peio
|
||||||
|
"""
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from mirri.entities.publication import Publication
|
||||||
|
from mirri.entities.date_range import DateRange
|
||||||
|
from mirri.entities.location import Location
|
||||||
|
from mirri.entities.sequence import GenomicSequence
|
||||||
|
from mirri.entities.strain import (
|
||||||
|
Collect,
|
||||||
|
Deposit,
|
||||||
|
Isolation,
|
||||||
|
ValidationError,
|
||||||
|
OrganismType,
|
||||||
|
Strain,
|
||||||
|
StrainId,
|
||||||
|
Taxonomy,
|
||||||
|
)
|
||||||
|
from mirri.settings import (
|
||||||
|
COLLECT,
|
||||||
|
COUNTRY,
|
||||||
|
DATE_OF_ISOLATION,
|
||||||
|
DEPOSIT,
|
||||||
|
DEPOSITOR,
|
||||||
|
GENETICS,
|
||||||
|
GROWTH,
|
||||||
|
ISOLATED_BY,
|
||||||
|
ISOLATION,
|
||||||
|
LOCATION,
|
||||||
|
MARKERS,
|
||||||
|
NAGOYA_DOCS_AVAILABLE,
|
||||||
|
NAGOYA_PROTOCOL,
|
||||||
|
ORGANISM_TYPE,
|
||||||
|
OTHER_CULTURE_NUMBERS,
|
||||||
|
PLOIDY,
|
||||||
|
RECOMMENDED_GROWTH_MEDIUM,
|
||||||
|
TAXONOMY,
|
||||||
|
DATE_OF_INCLUSION, NO_RESTRICTION
|
||||||
|
)
|
||||||
|
from mirri.validation.entity_validators import validate_strain
|
||||||
|
|
||||||
|
|
||||||
|
class TestDataRange(unittest.TestCase):
|
||||||
|
def test_data_range_init(self):
|
||||||
|
dr = DateRange()
|
||||||
|
|
||||||
|
self.assertFalse(dr)
|
||||||
|
|
||||||
|
self.assertEqual(dr.__str__(), "")
|
||||||
|
self.assertEqual(dr.range["start"], None)
|
||||||
|
self.assertEqual(dr.range["end"], None)
|
||||||
|
|
||||||
|
dr.strpdate("2012")
|
||||||
|
self.assertEqual(dr.strfdate, "2012----")
|
||||||
|
self.assertTrue(dr)
|
||||||
|
|
||||||
|
dr.strpdate("2012----")
|
||||||
|
self.assertEqual(dr.strfdate, "2012----")
|
||||||
|
|
||||||
|
dr.strpdate("201212--")
|
||||||
|
self.assertEqual(dr.strfdate, "201212--")
|
||||||
|
try:
|
||||||
|
dr.strpdate("201213--")
|
||||||
|
self.fail()
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
dr = DateRange(year=2012, month=13)
|
||||||
|
self.fail()
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
dr = DateRange(year=2020)
|
||||||
|
self.assertEqual(dr.strfdate, "2020----")
|
||||||
|
|
||||||
|
dr2 = dr.strpdate("2012")
|
||||||
|
self.assertEqual(dr2.range["start"].year, 2012)
|
||||||
|
self.assertEqual(dr2.range["start"].month, 1)
|
||||||
|
self.assertEqual(dr2.range["start"].day, 1)
|
||||||
|
|
||||||
|
self.assertEqual(dr2.range["end"].year, 2012)
|
||||||
|
self.assertEqual(dr2.range["end"].month, 12)
|
||||||
|
self.assertEqual(dr2.range["end"].day, 31)
|
||||||
|
|
||||||
|
|
||||||
|
class TestCollect(unittest.TestCase):
|
||||||
|
def test_collect_basic(self):
|
||||||
|
collect = Collect()
|
||||||
|
self.assertEqual(collect.dict(), {})
|
||||||
|
|
||||||
|
collect.location.country = "ESP"
|
||||||
|
collect.date = DateRange().strpdate("2012----")
|
||||||
|
|
||||||
|
collect.who = "pepito"
|
||||||
|
self.assertEqual(
|
||||||
|
dict(collect.dict()),
|
||||||
|
{
|
||||||
|
"location": {"countryOfOriginCode": "ESP"},
|
||||||
|
"collected_by": "pepito",
|
||||||
|
"date_of_collection": "2012----",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self.assertEqual(collect.__str__(),
|
||||||
|
"Collected: Spain in 2012---- by pepito")
|
||||||
|
|
||||||
|
|
||||||
|
class TestOrganismType(unittest.TestCase):
|
||||||
|
def test_basic_usage(self):
|
||||||
|
org_type = OrganismType(2)
|
||||||
|
self.assertEqual(org_type.name, "Archaea")
|
||||||
|
self.assertEqual(org_type.code, 2)
|
||||||
|
try:
|
||||||
|
org_type.ko = 'a'
|
||||||
|
self.fail()
|
||||||
|
except TypeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
org_type = OrganismType("Archaea")
|
||||||
|
|
||||||
|
|
||||||
|
class TestTaxonomy(unittest.TestCase):
|
||||||
|
def test_taxonomy_basic(self):
|
||||||
|
taxonomy = Taxonomy()
|
||||||
|
self.assertEqual(taxonomy.dict(), {})
|
||||||
|
self.assertFalse(taxonomy)
|
||||||
|
|
||||||
|
def test_taxonomy_with_data(self):
|
||||||
|
taxonomy = Taxonomy()
|
||||||
|
taxonomy.genus = "Bacilus"
|
||||||
|
taxonomy.organism_type = [OrganismType("Archaea")]
|
||||||
|
taxonomy.species = "vulgaris"
|
||||||
|
self.assertEqual(taxonomy.long_name, "Bacilus vulgaris")
|
||||||
|
|
||||||
|
# print(taxonomy.dict())
|
||||||
|
|
||||||
|
|
||||||
|
class TestLocation(unittest.TestCase):
|
||||||
|
def test_empty_init(self):
|
||||||
|
loc = Location()
|
||||||
|
self.assertEqual(loc.dict(), {})
|
||||||
|
self.assertFalse(loc)
|
||||||
|
|
||||||
|
def test_add_data(self):
|
||||||
|
loc = Location()
|
||||||
|
loc.country = "esp"
|
||||||
|
self.assertEqual(loc.dict(), {COUNTRY: "esp"})
|
||||||
|
loc.state = None
|
||||||
|
self.assertEqual(loc.dict(), {COUNTRY: "esp"})
|
||||||
|
|
||||||
|
|
||||||
|
class TestStrain(unittest.TestCase):
|
||||||
|
def test_empty_strain(self):
|
||||||
|
strain = Strain()
|
||||||
|
self.assertEqual(strain.dict(), {})
|
||||||
|
|
||||||
|
def test_strain_add_data(self):
|
||||||
|
strain = Strain()
|
||||||
|
|
||||||
|
strain.id.number = "5433"
|
||||||
|
strain.id.collection = "CECT"
|
||||||
|
strain.id.url = "https://cect/2342"
|
||||||
|
|
||||||
|
try:
|
||||||
|
strain.nagoya_protocol = "asdas"
|
||||||
|
self.fail()
|
||||||
|
except ValidationError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
strain.nagoya_protocol = NAGOYA_DOCS_AVAILABLE
|
||||||
|
strain.dict()[NAGOYA_PROTOCOL] = NAGOYA_DOCS_AVAILABLE
|
||||||
|
|
||||||
|
strain.collect.location.country = "ESP"
|
||||||
|
|
||||||
|
self.assertEqual(strain.dict()[COLLECT][LOCATION][COUNTRY], "ESP")
|
||||||
|
|
||||||
|
strain.genetics.ploidy = 9
|
||||||
|
self.assertEqual(strain.dict()[GENETICS][PLOIDY], 9)
|
||||||
|
|
||||||
|
strain.growth.recommended_media = ["asd"]
|
||||||
|
strain.isolation.date = DateRange(year=1900)
|
||||||
|
self.assertEqual(strain.dict()[ISOLATION]
|
||||||
|
[DATE_OF_ISOLATION], "1900----")
|
||||||
|
|
||||||
|
strain.deposit.who = "pepe"
|
||||||
|
self.assertEqual(strain.dict()[DEPOSIT][DEPOSITOR], "pepe")
|
||||||
|
|
||||||
|
strain.growth.recommended_media = ["11"]
|
||||||
|
self.assertEqual(strain.dict()[GROWTH]
|
||||||
|
[RECOMMENDED_GROWTH_MEDIUM], ["11"])
|
||||||
|
|
||||||
|
strain.taxonomy.organism_type = [OrganismType(2)]
|
||||||
|
self.assertEqual(
|
||||||
|
strain.dict()[TAXONOMY][ORGANISM_TYPE], [
|
||||||
|
{"code": 2, "name": "Archaea"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
strain.taxonomy.organism_type = [OrganismType("Algae")]
|
||||||
|
self.assertEqual(
|
||||||
|
strain.dict()[TAXONOMY][ORGANISM_TYPE], [
|
||||||
|
{"code": 1, "name": "Algae"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
strain.other_numbers.append(StrainId(collection="aaa", number="a"))
|
||||||
|
strain.other_numbers.append(StrainId(collection="aaa3", number="a3"))
|
||||||
|
self.assertEqual(
|
||||||
|
strain.dict()[OTHER_CULTURE_NUMBERS],
|
||||||
|
[
|
||||||
|
{"collection_code": "aaa", "accession_number": "a"},
|
||||||
|
{"collection_code": "aaa3", "accession_number": "a3"},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
strain.form_of_supply = ["Agar", "Lyo"]
|
||||||
|
gen_seq = GenomicSequence()
|
||||||
|
self.assertEqual(gen_seq.dict(), {})
|
||||||
|
gen_seq.marker_id = "pepe"
|
||||||
|
gen_seq.marker_type = "16S rRNA"
|
||||||
|
strain.genetics.markers.append(gen_seq)
|
||||||
|
self.assertEqual(
|
||||||
|
strain.dict()[GENETICS][MARKERS],
|
||||||
|
[{"marker_type": "16S rRNA", "INSDC": "pepe"}],
|
||||||
|
)
|
||||||
|
|
||||||
|
strain.collect.habitat_ontobiotope = "OBT:111111"
|
||||||
|
self.assertEqual(strain.collect.habitat_ontobiotope, "OBT:111111")
|
||||||
|
|
||||||
|
try:
|
||||||
|
strain.collect.habitat_ontobiotope = "OBT:11111"
|
||||||
|
self.fail()
|
||||||
|
except ValidationError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# publications
|
||||||
|
try:
|
||||||
|
strain.publications = 1
|
||||||
|
self.fail()
|
||||||
|
except ValidationError:
|
||||||
|
pass
|
||||||
|
pub = Publication()
|
||||||
|
pub.id = "1"
|
||||||
|
try:
|
||||||
|
strain.publications = pub
|
||||||
|
self.fail()
|
||||||
|
except ValidationError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
strain.publications = [pub]
|
||||||
|
self.assertEqual(strain.publications[0].id, "1")
|
||||||
|
|
||||||
|
strain.catalog_inclusion_date = DateRange(year=1992)
|
||||||
|
self.assertEqual(strain.dict()[DATE_OF_INCLUSION], '1992----')
|
||||||
|
|
||||||
|
import pprint
|
||||||
|
|
||||||
|
pprint.pprint(strain.dict())
|
||||||
|
|
||||||
|
def test_strain_validation(self):
|
||||||
|
strain = Strain()
|
||||||
|
strain.form_of_supply = ['Lyo']
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
errors = validate_strain(strain)
|
||||||
|
self.assertEqual(len(errors), 10)
|
||||||
|
|
||||||
|
strain.id.collection = 'test'
|
||||||
|
strain.id.number = '1'
|
||||||
|
|
||||||
|
|
||||||
|
errors = validate_strain(strain)
|
||||||
|
self.assertEqual(len(errors), 9)
|
||||||
|
|
||||||
|
strain.nagoya_protocol = NAGOYA_DOCS_AVAILABLE
|
||||||
|
strain.restriction_on_use = NO_RESTRICTION
|
||||||
|
strain.risk_group = 1
|
||||||
|
strain.taxonomy.organism_type = [OrganismType(4)]
|
||||||
|
strain.taxonomy.hybrids = ['Sac lac', 'Sac lcac3']
|
||||||
|
strain.growth.recommended_media = ['aa']
|
||||||
|
strain.growth.recommended_temp = {'min': 2, 'max':5}
|
||||||
|
strain.form_of_supply = ['lyo']
|
||||||
|
strain.collect.location.country = 'ESP'
|
||||||
|
errors = validate_strain(strain)
|
||||||
|
self.assertFalse(errors)
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsolation(unittest.TestCase):
|
||||||
|
def test_iniatialize_isollation(self):
|
||||||
|
isolation = Isolation()
|
||||||
|
self.assertEqual(isolation.dict(), {})
|
||||||
|
isolation.who = "pepito"
|
||||||
|
self.assertTrue(ISOLATED_BY in isolation.dict())
|
||||||
|
isolation.date = DateRange().strpdate("2012----")
|
||||||
|
self.assertTrue(DATE_OF_ISOLATION in isolation.dict())
|
||||||
|
|
||||||
|
try:
|
||||||
|
isolation.location.site = "spain"
|
||||||
|
self.fail()
|
||||||
|
except (ValueError, AttributeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestGenomicSequence(unittest.TestCase):
|
||||||
|
def test_empty_init(self):
|
||||||
|
gen_seq = GenomicSequence()
|
||||||
|
self.assertEqual(gen_seq.dict(), {})
|
||||||
|
gen_seq.marker_id = "pepe"
|
||||||
|
gen_seq.marker_type = "16S rRNA"
|
||||||
|
self.assertEqual(gen_seq.dict(), {
|
||||||
|
"marker_type": "16S rRNA", "INSDC": "pepe"})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# import sys;sys.argv = ['', 'TestStrain']
|
||||||
|
unittest.main()
|
||||||
51
tests/test_parsers.py
Normal file
51
tests/test_parsers.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
from mirri.entities.strain import ValidationError
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
from pprint import pprint
|
||||||
|
from mirri.io.parsers.mirri_excel import parse_mirri_excel
|
||||||
|
|
||||||
|
TEST_DATA_DIR = Path(__file__).parent / "data"
|
||||||
|
|
||||||
|
|
||||||
|
class MirriExcelTests(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_mirri_excel_parser(self):
|
||||||
|
in_path = TEST_DATA_DIR / "valid.mirri.xlsx"
|
||||||
|
with in_path.open("rb") as fhand:
|
||||||
|
parsed_data = parse_mirri_excel(fhand, version="20200601")
|
||||||
|
|
||||||
|
medium = parsed_data["growth_media"][0]
|
||||||
|
self.assertEqual("1", medium.acronym)
|
||||||
|
self.assertEqual(medium.description, "NUTRIENT BROTH/AGAR I")
|
||||||
|
|
||||||
|
strains = list(parsed_data["strains"])
|
||||||
|
strain = strains[0]
|
||||||
|
self.assertEqual(strain.publications[0].id, 1)
|
||||||
|
self.assertEqual(strain.publications[0].title, 'Cosa')
|
||||||
|
self.assertEqual(strain.id.number, "1")
|
||||||
|
pprint(strain.dict())
|
||||||
|
|
||||||
|
def xtest_mirri_excel_parser_invalid_fail(self):
|
||||||
|
in_path = TEST_DATA_DIR / "invalid.mirri.xlsx"
|
||||||
|
with in_path.open("rb") as fhand:
|
||||||
|
try:
|
||||||
|
parse_mirri_excel(fhand, version="20200601")
|
||||||
|
self.fail()
|
||||||
|
except ValidationError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def xtest_mirri_excel_parser_invalid(self):
|
||||||
|
in_path = TEST_DATA_DIR / "invalid.mirri.xlsx"
|
||||||
|
with in_path.open("rb") as fhand:
|
||||||
|
parsed_data = parse_mirri_excel(
|
||||||
|
fhand, version="20200601")
|
||||||
|
|
||||||
|
errors = parsed_data["errors"]
|
||||||
|
for _id, _errors in errors.items():
|
||||||
|
print(_id, _errors)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# import sys;sys.argv = ['',
|
||||||
|
# 'MirriExcelTests.test_mirri_excel_parser_invalid']
|
||||||
|
unittest.main()
|
||||||
589
tests/test_validation.py
Normal file
589
tests/test_validation.py
Normal file
@ -0,0 +1,589 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
from itertools import chain
|
||||||
|
|
||||||
|
from mirri.validation.tags import (
|
||||||
|
CHOICES,
|
||||||
|
COORDINATES,
|
||||||
|
CROSSREF,
|
||||||
|
CROSSREF_NAME,
|
||||||
|
DATE,
|
||||||
|
MATCH,
|
||||||
|
MISSING,
|
||||||
|
MULTIPLE,
|
||||||
|
NUMBER,
|
||||||
|
REGEXP,
|
||||||
|
SEPARATOR,
|
||||||
|
TAXON,
|
||||||
|
TYPE,
|
||||||
|
UNIQUE,
|
||||||
|
VALUES
|
||||||
|
)
|
||||||
|
|
||||||
|
from mirri.validation.excel_validator import (
|
||||||
|
is_valid_choices,
|
||||||
|
is_valid_coords,
|
||||||
|
is_valid_crossrefs,
|
||||||
|
is_valid_date,
|
||||||
|
is_valid_missing,
|
||||||
|
is_valid_number,
|
||||||
|
is_valid_regex,
|
||||||
|
is_valid_taxon,
|
||||||
|
is_valid_unique,
|
||||||
|
is_valid_file,
|
||||||
|
validate_mirri_excel,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
TEST_DATA_DIR = Path(__file__).parent / "data"
|
||||||
|
TS_VALUE = "value"
|
||||||
|
TS_CONF = "conf"
|
||||||
|
TS_ASSERT = "assert_func"
|
||||||
|
|
||||||
|
|
||||||
|
class MirriExcelValidationTests(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_validation_structure(self):
|
||||||
|
in_path = TEST_DATA_DIR / "invalid_structure.mirri.xlsx"
|
||||||
|
with in_path.open("rb") as fhand:
|
||||||
|
error_log = validate_mirri_excel(fhand)
|
||||||
|
|
||||||
|
entities = []
|
||||||
|
err_codes = []
|
||||||
|
for ett, errors in error_log.get_errors().items():
|
||||||
|
entities.append(ett)
|
||||||
|
err_codes.extend([err.code for err in errors])
|
||||||
|
|
||||||
|
self.assertIn("EFS", entities)
|
||||||
|
self.assertIn("STD", entities)
|
||||||
|
self.assertIn("GOD", entities)
|
||||||
|
self.assertIn("GMD", entities)
|
||||||
|
|
||||||
|
self.assertIn("EFS03", err_codes)
|
||||||
|
self.assertIn("EFS06", err_codes)
|
||||||
|
self.assertIn("EFS08", err_codes)
|
||||||
|
self.assertIn("GOD06", err_codes)
|
||||||
|
self.assertIn("GMD01", err_codes)
|
||||||
|
self.assertIn("STD05", err_codes)
|
||||||
|
self.assertIn("STD08", err_codes)
|
||||||
|
self.assertIn("STD12", err_codes)
|
||||||
|
|
||||||
|
def test_validation_content(self):
|
||||||
|
in_path = TEST_DATA_DIR / "invalid_content.mirri.xlsx"
|
||||||
|
with in_path.open("rb") as fhand:
|
||||||
|
error_log = validate_mirri_excel(fhand)
|
||||||
|
|
||||||
|
entities = []
|
||||||
|
err_codes = []
|
||||||
|
for ett, errors in error_log.get_errors().items():
|
||||||
|
entities.append(ett)
|
||||||
|
err_codes.extend([err.code for err in errors])
|
||||||
|
|
||||||
|
self.assertTrue(len(err_codes) > 0)
|
||||||
|
|
||||||
|
self.assertNotIn("EFS", entities)
|
||||||
|
self.assertIn("STD", entities)
|
||||||
|
self.assertIn("GOD", entities)
|
||||||
|
self.assertIn("GID", entities)
|
||||||
|
|
||||||
|
self.assertIn("GOD04", err_codes)
|
||||||
|
self.assertIn("GOD07", err_codes)
|
||||||
|
self.assertIn("GID03", err_codes)
|
||||||
|
self.assertIn("STD11", err_codes)
|
||||||
|
self.assertIn("STD15", err_codes)
|
||||||
|
self.assertIn("STD22", err_codes)
|
||||||
|
self.assertIn("STD04", err_codes)
|
||||||
|
self.assertIn("STD10", err_codes)
|
||||||
|
self.assertIn("STD07", err_codes)
|
||||||
|
self.assertIn("STD14", err_codes)
|
||||||
|
self.assertIn("STD16", err_codes)
|
||||||
|
|
||||||
|
def test_validation_valid(self):
|
||||||
|
in_path = TEST_DATA_DIR / "valid.mirri.xlsx"
|
||||||
|
with in_path.open("rb") as fhand:
|
||||||
|
error_log = validate_mirri_excel(fhand)
|
||||||
|
|
||||||
|
self.assertTrue(len(error_log.get_errors()) == 0)
|
||||||
|
|
||||||
|
|
||||||
|
class ValidatoionFunctionsTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_is_valid_regex(self):
|
||||||
|
tests = [
|
||||||
|
{
|
||||||
|
TS_VALUE: "abcDEF",
|
||||||
|
TS_CONF: {TYPE: REGEXP, MATCH: r"[a-zA-Z]+"},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "123456",
|
||||||
|
TS_CONF: {TYPE: REGEXP, MATCH: r"[a-zA-Z]+"},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "123456",
|
||||||
|
TS_CONF: {TYPE: REGEXP, MATCH: r"\d+"},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "abcdef",
|
||||||
|
TS_CONF: {TYPE: REGEXP, MATCH: r"\d+"},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "abc 123",
|
||||||
|
TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "123 abc",
|
||||||
|
TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "123 ",
|
||||||
|
TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
value = test[TS_VALUE]
|
||||||
|
conf = test[TS_CONF]
|
||||||
|
assert_func = test[TS_ASSERT]
|
||||||
|
with self.subTest(value=value):
|
||||||
|
assert_func(is_valid_regex(value, conf))
|
||||||
|
|
||||||
|
def test_is_valid_choices(self):
|
||||||
|
tests = [
|
||||||
|
{
|
||||||
|
TS_VALUE: "1",
|
||||||
|
TS_CONF: {TYPE: CHOICES, VALUES: ["1", "2", "3", "4"]},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "1, 3",
|
||||||
|
TS_CONF: {
|
||||||
|
TYPE: CHOICES,
|
||||||
|
VALUES: ["1", "2", "3", "4"],
|
||||||
|
MULTIPLE: True,
|
||||||
|
SEPARATOR: ","
|
||||||
|
},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "5",
|
||||||
|
TS_CONF: {TYPE: CHOICES, VALUES: ["1", "2", "3", "4"]},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
value = test[TS_VALUE]
|
||||||
|
conf = test[TS_CONF]
|
||||||
|
assert_func = test[TS_ASSERT]
|
||||||
|
with self.subTest(value=value):
|
||||||
|
assert_func(is_valid_choices(value, conf))
|
||||||
|
|
||||||
|
def test_is_valid_crossref(self):
|
||||||
|
tests = [
|
||||||
|
{
|
||||||
|
TS_VALUE: "abc",
|
||||||
|
TS_CONF: {
|
||||||
|
TYPE: CROSSREF,
|
||||||
|
CROSSREF_NAME: "values",
|
||||||
|
"crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
|
||||||
|
},
|
||||||
|
TS_ASSERT: self.assertTrue,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "123",
|
||||||
|
TS_CONF: {
|
||||||
|
TYPE: CROSSREF,
|
||||||
|
CROSSREF_NAME: "values",
|
||||||
|
"crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
|
||||||
|
},
|
||||||
|
TS_ASSERT: self.assertFalse,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "abc, def",
|
||||||
|
TS_CONF: {
|
||||||
|
TYPE: CROSSREF,
|
||||||
|
CROSSREF_NAME: "values",
|
||||||
|
"crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
|
||||||
|
MULTIPLE: True,
|
||||||
|
SEPARATOR: ",",
|
||||||
|
},
|
||||||
|
TS_ASSERT: self.assertTrue,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "abc, 123",
|
||||||
|
TS_CONF: {
|
||||||
|
TYPE: CROSSREF,
|
||||||
|
CROSSREF_NAME: "values",
|
||||||
|
"crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
|
||||||
|
MULTIPLE: True,
|
||||||
|
SEPARATOR: ",",
|
||||||
|
},
|
||||||
|
TS_ASSERT: self.assertFalse,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
value = test[TS_VALUE]
|
||||||
|
conf = test[TS_CONF]
|
||||||
|
assert_func = test[TS_ASSERT]
|
||||||
|
with self.subTest(value=value):
|
||||||
|
assert_func(is_valid_crossrefs(value, conf))
|
||||||
|
|
||||||
|
def test_is_valid_missing(self):
|
||||||
|
tests = [
|
||||||
|
{
|
||||||
|
TS_VALUE: 1,
|
||||||
|
TS_CONF: {TYPE: MISSING},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "abc",
|
||||||
|
TS_CONF: {TYPE: MISSING},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: None,
|
||||||
|
TS_CONF: {TYPE: MISSING},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
value = test[TS_VALUE]
|
||||||
|
conf = test[TS_CONF]
|
||||||
|
assert_func = test[TS_ASSERT]
|
||||||
|
with self.subTest(value=value):
|
||||||
|
assert_func(is_valid_missing(value, conf))
|
||||||
|
|
||||||
|
def test_is_valid_date(self):
|
||||||
|
tests = [
|
||||||
|
{
|
||||||
|
TS_VALUE: '2020-04-07',
|
||||||
|
TS_CONF: {TYPE: DATE},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: '2020/04/07',
|
||||||
|
TS_CONF: {TYPE: DATE},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: datetime(2021, 5, 1),
|
||||||
|
TS_CONF: {TYPE: DATE},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: '2020-05',
|
||||||
|
TS_CONF: {TYPE: DATE},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: '2020/05',
|
||||||
|
TS_CONF: {TYPE: DATE},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 2020,
|
||||||
|
TS_CONF: {TYPE: DATE},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: '2021 05 01',
|
||||||
|
TS_CONF: {TYPE: DATE},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: '04-07-2020',
|
||||||
|
TS_CONF: {TYPE: DATE},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: '2021-02-31',
|
||||||
|
TS_CONF: {TYPE: DATE},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: '2021-15',
|
||||||
|
TS_CONF: {TYPE: DATE},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: '15-2021',
|
||||||
|
TS_CONF: {TYPE: DATE},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 3000,
|
||||||
|
TS_CONF: {TYPE: DATE},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: -2020,
|
||||||
|
TS_CONF: {TYPE: DATE},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
value = test[TS_VALUE]
|
||||||
|
conf = test[TS_CONF]
|
||||||
|
assert_func = test[TS_ASSERT]
|
||||||
|
with self.subTest(value=value):
|
||||||
|
assert_func(is_valid_date(value, conf))
|
||||||
|
|
||||||
|
def test_is_valid_coordinates(self):
|
||||||
|
tests = [
|
||||||
|
{
|
||||||
|
TS_VALUE: "23; 50",
|
||||||
|
TS_CONF: {TYPE: COORDINATES},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "-90; -100",
|
||||||
|
TS_CONF: {TYPE: COORDINATES},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "90; 100",
|
||||||
|
TS_CONF: {TYPE: COORDINATES},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "0; 0",
|
||||||
|
TS_CONF: {TYPE: COORDINATES},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "10; 20; 5",
|
||||||
|
TS_CONF: {TYPE: COORDINATES},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "10; 20; -5",
|
||||||
|
TS_CONF: {TYPE: COORDINATES},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "91; 50",
|
||||||
|
TS_CONF: {TYPE: COORDINATES},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "87; 182",
|
||||||
|
TS_CONF: {TYPE: COORDINATES},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "-200; 182",
|
||||||
|
TS_CONF: {TYPE: COORDINATES},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "20, 40",
|
||||||
|
TS_CONF: {TYPE: COORDINATES},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "abc def",
|
||||||
|
TS_CONF: {TYPE: COORDINATES},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 123,
|
||||||
|
TS_CONF: {TYPE: COORDINATES},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
value = test[TS_VALUE]
|
||||||
|
conf = test[TS_CONF]
|
||||||
|
assert_func = test[TS_ASSERT]
|
||||||
|
with self.subTest(value=value):
|
||||||
|
assert_func(is_valid_coords(value, conf))
|
||||||
|
|
||||||
|
def test_is_valid_number(self):
|
||||||
|
tests = [
|
||||||
|
{
|
||||||
|
TS_VALUE: 1,
|
||||||
|
TS_CONF: {TYPE: NUMBER},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 2.5,
|
||||||
|
TS_CONF: {TYPE: NUMBER},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "10",
|
||||||
|
TS_CONF: {TYPE: NUMBER},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "10.5",
|
||||||
|
TS_CONF: {TYPE: NUMBER},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 5,
|
||||||
|
TS_CONF: {TYPE: NUMBER, "min": 0},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 5,
|
||||||
|
TS_CONF: {TYPE: NUMBER, "max": 10},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 5,
|
||||||
|
TS_CONF: {TYPE: NUMBER, "min": 0, "max": 10},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "hello",
|
||||||
|
TS_CONF: {TYPE: NUMBER},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 10,
|
||||||
|
TS_CONF: {TYPE: NUMBER, "max": 5},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 0,
|
||||||
|
TS_CONF: {TYPE: NUMBER, "min": 5},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
value = test[TS_VALUE]
|
||||||
|
conf = test[TS_CONF]
|
||||||
|
assert_func = test[TS_ASSERT]
|
||||||
|
with self.subTest(value=value):
|
||||||
|
assert_func(is_valid_number(value, conf))
|
||||||
|
|
||||||
|
def test_is_valid_taxon(self):
|
||||||
|
tests = [
|
||||||
|
{
|
||||||
|
TS_VALUE: 'sp. species',
|
||||||
|
TS_CONF: {TYPE: TAXON},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 'spp species subsp. subspecies',
|
||||||
|
TS_CONF: {TYPE: TAXON},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 'spp species subsp. subspecies var. variety',
|
||||||
|
TS_CONF: {TYPE: TAXON},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 'spp taxon',
|
||||||
|
TS_CONF: {TYPE: TAXON},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 'Candidaceae',
|
||||||
|
TS_CONF: {TYPE: TAXON},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 'sp sp species',
|
||||||
|
TS_CONF: {TYPE: TAXON},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: 'spp species abc. def',
|
||||||
|
TS_CONF: {TYPE: TAXON},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
value = test[TS_VALUE]
|
||||||
|
conf = test[TS_CONF]
|
||||||
|
assert_func = test[TS_ASSERT]
|
||||||
|
with self.subTest(value=value):
|
||||||
|
assert_func(is_valid_taxon(value, conf))
|
||||||
|
|
||||||
|
def test_is_valid_unique(self):
|
||||||
|
tests = [
|
||||||
|
{
|
||||||
|
TS_VALUE: "abc",
|
||||||
|
TS_CONF: {
|
||||||
|
TYPE: UNIQUE,
|
||||||
|
"label": "values",
|
||||||
|
"shown_values": {}
|
||||||
|
},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "jkl",
|
||||||
|
TS_CONF: {
|
||||||
|
TYPE: UNIQUE,
|
||||||
|
"label": "values",
|
||||||
|
"shown_values": {
|
||||||
|
"values": {"abc": '',
|
||||||
|
"def": '',
|
||||||
|
"ghi": ''},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: "abc",
|
||||||
|
TS_CONF: {
|
||||||
|
TYPE: UNIQUE,
|
||||||
|
"label": "values",
|
||||||
|
"shown_values": {
|
||||||
|
"values": {"abc": '',
|
||||||
|
"def": '',
|
||||||
|
"ghi": ''},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
value = test[TS_VALUE]
|
||||||
|
conf = test[TS_CONF]
|
||||||
|
assert_func = test[TS_ASSERT]
|
||||||
|
with self.subTest(value=value):
|
||||||
|
assert_func(is_valid_unique(value, conf))
|
||||||
|
|
||||||
|
def test_is_valid_file(self):
|
||||||
|
tests = [
|
||||||
|
{
|
||||||
|
TS_VALUE: TEST_DATA_DIR / "invalid_structure.mirri.xlsx",
|
||||||
|
TS_ASSERT: self.assertTrue
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TS_VALUE: TEST_DATA_DIR / "invalid_excel.mirri.json",
|
||||||
|
TS_ASSERT: self.assertFalse
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
value = test[TS_VALUE]
|
||||||
|
assert_func = test[TS_ASSERT]
|
||||||
|
with self.subTest(value=value):
|
||||||
|
assert_func(is_valid_file(value,))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
# sys.argv = ['',
|
||||||
|
# 'ValidatoionFunctionsTest.test_is_valid_regex']
|
||||||
|
unittest.main()
|
||||||
24
tests/test_writers.py
Normal file
24
tests/test_writers.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
from mirri.io.writers.mirri_excel import write_mirri_excel
|
||||||
|
from mirri.io.parsers.mirri_excel import parse_mirri_excel
|
||||||
|
|
||||||
|
TEST_DATA_DIR = Path(__file__).parent / "data"
|
||||||
|
|
||||||
|
|
||||||
|
class MirriExcelTests(unittest.TestCase):
|
||||||
|
def test_valid_excel(self):
|
||||||
|
in_path = TEST_DATA_DIR / "valid.mirri.full.xlsx"
|
||||||
|
parsed_data = parse_mirri_excel(in_path.open('rb'), version="20200601")
|
||||||
|
strains = parsed_data["strains"]
|
||||||
|
growth_media = parsed_data["growth_media"]
|
||||||
|
out_path = Path("/tmp/test.xlsx")
|
||||||
|
|
||||||
|
write_mirri_excel(out_path, strains, growth_media, version="20200601")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# import sys;sys.argv = ['',
|
||||||
|
# 'BiolomicsWriter.test_mirri_excel_parser_invalid']
|
||||||
|
unittest.main()
|
||||||
Loading…
x
Reference in New Issue
Block a user