Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 31d6f0c8a2 | |||
| 22b14e4279 | |||
| 1fffc38b08 | |||
| b8b11fe690 | |||
| 2370686d72 | |||
| 37b2bbce98 | |||
| e26883e68e | |||
| f188754bc7 | |||
| 303a404fa0 | |||
| 322ed203d8 | |||
| e2278fd509 |
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,3 +0,0 @@
|
||||
mirri/biolomics/secrets.py
|
||||
.vscode/launch.json
|
||||
*.pyc
|
||||
34
README.md
34
README.md
@ -1,34 +0,0 @@
|
||||
# MIRRI Utils
|
||||
|
||||
## Installation
|
||||
|
||||
> pip install path_to_package.tar.gz
|
||||
|
||||
|
||||
## Description
|
||||
|
||||
A small set of utilities to deal with Mirri Data.
|
||||
|
||||
- A data class to deal with strain data.
|
||||
|
||||
- An excel reader for mirri specification
|
||||
|
||||
- An excel validator for mirri specification
|
||||
|
||||
- An excel writer to create the excel with MIRRI specifications
|
||||
|
||||
|
||||
## Update 06-09-2022
|
||||
Under the bin directory:
|
||||
bin\
|
||||
upload_strains_to_mirri_is_NEWDB.py
|
||||
validateNEW.py
|
||||
|
||||
those files are created to inserting the data from the excel files into the database mirridb.
|
||||
|
||||
validateNEW.py:
|
||||
>the purpose of this file is to be the orchestator for the validations and the calling the upload to mirridb.
|
||||
|
||||
|
||||
upload_strains_to_mirri_is_NEWDB.py:
|
||||
This script is inserting the excel into the database, the code has comments regarding the steps.
|
||||
BIN
__pycache__/__init__.cpython-311.pyc
Normal file
BIN
__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
__pycache__/settings.cpython-311.pyc
Normal file
BIN
__pycache__/settings.cpython-311.pyc
Normal file
Binary file not shown.
BIN
__pycache__/settings_v1.cpython-311.pyc
Normal file
BIN
__pycache__/settings_v1.cpython-311.pyc
Normal file
Binary file not shown.
BIN
__pycache__/validate_v5.cpython-311.pyc
Normal file
BIN
__pycache__/validate_v5.cpython-311.pyc
Normal file
Binary file not shown.
@ -1,77 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||
from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS, STRAIN_WS
|
||||
|
||||
SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
|
||||
|
||||
|
||||
def get_cmd_args():
|
||||
desc = "Upload strains to MIRRI-IS"
|
||||
parser = argparse.ArgumentParser(description=desc)
|
||||
parser.add_argument('-a', '--accession_number', required=True,
|
||||
help='Delete the duplicated items in database for the given accession number')
|
||||
parser.add_argument('-u', '--ws_user', help='Username of the web service',
|
||||
required=True)
|
||||
parser.add_argument('-p', '--ws_password', required=True,
|
||||
help='Password of the web service user')
|
||||
parser.add_argument('-c', '--client_id', required=True,
|
||||
help='Client id of the web service')
|
||||
parser.add_argument('-s', '--client_secret', required=True,
|
||||
help='Client secret of the web service')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return {'accession_number': args.accession_number, 'user': args.ws_user,
|
||||
'password': args.ws_password, 'client_id': args.client_id,
|
||||
'client_secret': args.client_secret}
|
||||
|
||||
|
||||
def write_errors_in_screen(errors, fhand=sys.stderr):
|
||||
for key, errors_by_type in errors.items():
|
||||
fhand.write(f'{key}\n')
|
||||
fhand.write('-' * len(key) + '\n')
|
||||
for error in errors_by_type:
|
||||
if error.pk:
|
||||
fhand.write(f'{error.pk}: ')
|
||||
fhand.write(f'{error.message} - {error.code}\n')
|
||||
fhand.write('\n')
|
||||
|
||||
|
||||
def main():
|
||||
args = get_cmd_args()
|
||||
out_fhand = sys.stdout
|
||||
|
||||
client = BiolomicsMirriClient(server_url=SERVER_URL, api_version= 'v2',
|
||||
client_id=args['client_id'],
|
||||
client_secret=args['client_secret'],
|
||||
username=args['user'],
|
||||
password=args['password'])
|
||||
query = {"Query": [{"Index": 0,
|
||||
"FieldName": "Collection accession number",
|
||||
"Operation": "TextExactMatch",
|
||||
"Value": args['accession_number']}],
|
||||
"Expression": "Q0",
|
||||
"DisplayStart": 0,
|
||||
"DisplayLength": 10}
|
||||
|
||||
result = client.search(STRAIN_WS, query=query)
|
||||
total = result["total"]
|
||||
if total == 0:
|
||||
out_fhand.write('Accession not in database\n')
|
||||
sys.exit(0)
|
||||
return None
|
||||
elif total == 1:
|
||||
out_fhand.write('Accession is not duplicated\n')
|
||||
sys.exit(0)
|
||||
|
||||
print(f'Duplicates found: {total}. removing duplicates')
|
||||
duplicated_ids = [record.record_id for record in result['records']]
|
||||
for duplicated_id in duplicated_ids[:-1]:
|
||||
client.delete_by_id(STRAIN_WS, duplicated_id)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -1,91 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from mirri.biolomics.pipelines.strain import retrieve_strain_by_accession_number
|
||||
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||
from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS, STRAIN_WS
|
||||
from mirri.io.parsers.mirri_excel import parse_mirri_excel
|
||||
from mirri.validation.excel_validator import validate_mirri_excel
|
||||
|
||||
SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
|
||||
|
||||
|
||||
def get_cmd_args():
|
||||
desc = "Upload strains to MIRRI-IS"
|
||||
parser = argparse.ArgumentParser(description=desc)
|
||||
parser.add_argument('-i', '--input', help='Validated Excel file',
|
||||
type=argparse.FileType('rb'), required=True)
|
||||
parser.add_argument('-v', '--spec_version', default='20200601',
|
||||
help='Version of he specification of the given excel file')
|
||||
parser.add_argument('-u', '--ws_user', help='Username of the web service',
|
||||
required=True)
|
||||
parser.add_argument('-p', '--ws_password', required=True,
|
||||
help='Password of the web service user')
|
||||
parser.add_argument('-c', '--client_id', required=True,
|
||||
help='Client id of the web service')
|
||||
parser.add_argument('-s', '--client_secret', required=True,
|
||||
help='Client secret of the web service')
|
||||
parser.add_argument('-f', '--force_update', required=False,
|
||||
action='store_true',
|
||||
help='Use it if you want to update the existing strains')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return {'input_fhand': args.input, 'user': args.ws_user,
|
||||
'version': args.spec_version,
|
||||
'password': args.ws_password, 'client_id': args.client_id,
|
||||
'client_secret': args.client_secret, 'update': args.force_update}
|
||||
|
||||
|
||||
def write_errors_in_screen(errors, fhand=sys.stderr):
|
||||
for key, errors_by_type in errors.items():
|
||||
fhand.write(f'{key}\n')
|
||||
fhand.write('-' * len(key) + '\n')
|
||||
for error in errors_by_type:
|
||||
if error.pk:
|
||||
fhand.write(f'{error.pk}: ')
|
||||
fhand.write(f'{error.message} - {error.code}\n')
|
||||
fhand.write('\n')
|
||||
|
||||
|
||||
def main():
|
||||
args = get_cmd_args()
|
||||
input_fhand = args['input_fhand']
|
||||
spec_version = args['version']
|
||||
out_fhand = sys.stderr
|
||||
error_log = validate_mirri_excel(input_fhand, version=spec_version)
|
||||
errors = error_log.get_errors()
|
||||
if errors:
|
||||
write_errors_in_screen(errors, out_fhand)
|
||||
sys.exit(1)
|
||||
|
||||
input_fhand.seek(0)
|
||||
parsed_objects = parse_mirri_excel(input_fhand, version=spec_version)
|
||||
strains = list(parsed_objects['strains'])
|
||||
growth_media = list(parsed_objects['growth_media'])
|
||||
|
||||
client = BiolomicsMirriClient(server_url=SERVER_URL, api_version= 'v2',
|
||||
client_id=args['client_id'],
|
||||
client_secret=args['client_secret'],
|
||||
username=args['user'],
|
||||
password=args['password'])
|
||||
for gm in growth_media:
|
||||
try:
|
||||
client.delete_by_name(GROWTH_MEDIUM_WS, gm.acronym)
|
||||
except ValueError as error:
|
||||
print(error)
|
||||
continue
|
||||
print(f'Growth medium {gm.acronym} deleted')
|
||||
|
||||
for strain in strains:
|
||||
ws_strain = retrieve_strain_by_accession_number(client, strain.id.strain_id)
|
||||
if ws_strain is not None:
|
||||
client.delete_by_id(STRAIN_WS, ws_strain.record_id)
|
||||
print(f'Strain {strain.id.strain_id} deleted')
|
||||
else:
|
||||
print(f'Strain {strain.id.strain_id} not in database')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -1,182 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import sys
|
||||
from collections import Counter
|
||||
|
||||
from mirri.biolomics.pipelines.growth_medium import get_or_create_or_update_growth_medium
|
||||
from mirri.biolomics.pipelines.strain import get_or_create_or_update_strain
|
||||
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||
from mirri.io.parsers.mirri_excel import parse_mirri_excel
|
||||
from mirri.validation.excel_validator import validate_mirri_excel
|
||||
|
||||
TEST_SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
|
||||
PROD_SERVER_URL = 'https://webservices.bio-aware.com/mirri'
|
||||
|
||||
|
||||
def get_cmd_args():
|
||||
desc = "Upload strains to MIRRI-IS"
|
||||
parser = argparse.ArgumentParser(description=desc)
|
||||
parser.add_argument('-i', '--input', help='Validated Excel file',
|
||||
type=argparse.FileType('rb'), required=True)
|
||||
parser.add_argument('-v', '--spec_version', default='20200601',
|
||||
help='Version of he specification of the given excel file')
|
||||
parser.add_argument('-u', '--ws_user', help='Username of the web service',
|
||||
required=True)
|
||||
parser.add_argument('-p', '--ws_password', required=True,
|
||||
help='Password of the web service user')
|
||||
parser.add_argument('-c', '--client_id', required=True,
|
||||
help='Client id of the web service')
|
||||
parser.add_argument('-s', '--client_secret', required=True,
|
||||
help='Client secret of the web service')
|
||||
parser.add_argument('--force_update', required=False,
|
||||
action='store_true',
|
||||
help='Use it if you want to update the existing strains')
|
||||
parser.add_argument('--verbose', action='store_true',
|
||||
help='use it if you want a verbose output')
|
||||
parser.add_argument('--prod', action='store_true',
|
||||
help='Use production server')
|
||||
parser.add_argument('--dont_add_gm', action='store_false',
|
||||
help="Don't add growth media", default=True)
|
||||
parser.add_argument('--dont_add_strains', action='store_false',
|
||||
help="Don't add growth media", default=True)
|
||||
parser.add_argument('--skip_first_num', type=int,
|
||||
help='skip first X strains to the tool')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return {'input_fhand': args.input, 'user': args.ws_user,
|
||||
'version': args.spec_version,
|
||||
'password': args.ws_password, 'client_id': args.client_id,
|
||||
'client_secret': args.client_secret, 'update': args.force_update,
|
||||
'verbose': args.verbose, 'use_production_server': args.prod,
|
||||
'add_gm': args.dont_add_gm, 'add_strains': args.dont_add_strains,
|
||||
'skip_first_num': args.skip_first_num}
|
||||
|
||||
|
||||
def write_errors_in_screen(errors, fhand=sys.stderr):
|
||||
for key, errors_by_type in errors.items():
|
||||
fhand.write(f'{key}\n')
|
||||
fhand.write('-' * len(key) + '\n')
|
||||
for error in errors_by_type:
|
||||
if error.pk:
|
||||
fhand.write(f'{error.pk}: ')
|
||||
fhand.write(f'{error.message} - {error.code}\n')
|
||||
fhand.write('\n')
|
||||
|
||||
|
||||
def create_or_upload_strains(client, strains, update=False, counter=None,
|
||||
out_fhand=None, seek=None):
|
||||
for index, strain in enumerate(strains):
|
||||
if seek is not None and index < seek:
|
||||
continue
|
||||
# if strain.id.strain_id != 'CECT 5766':
|
||||
# continue
|
||||
result = get_or_create_or_update_strain(client, strain, update=update)
|
||||
|
||||
new_strain = result['record']
|
||||
created = result['created']
|
||||
updated = result.get('updated', False)
|
||||
if updated:
|
||||
result_state = 'updated'
|
||||
elif created:
|
||||
result_state = 'created'
|
||||
else:
|
||||
result_state = 'not modified'
|
||||
if counter is not None:
|
||||
counter[result_state] += 1
|
||||
if out_fhand is not None:
|
||||
out_fhand.write(f'{index}: Strain {new_strain.id.strain_id}: {result_state}\n')
|
||||
# break
|
||||
|
||||
|
||||
def create_or_upload_growth_media(client, growth_media, update=False, counter=None,
|
||||
out_fhand=None):
|
||||
|
||||
for gm in growth_media:
|
||||
result = get_or_create_or_update_growth_medium(client, gm, update)
|
||||
|
||||
new_gm = result['record']
|
||||
created = result['created']
|
||||
updated = result.get('updated', False)
|
||||
if updated:
|
||||
result_state = 'updated'
|
||||
elif created:
|
||||
result_state = 'created'
|
||||
else:
|
||||
result_state = 'not modified'
|
||||
if counter is not None:
|
||||
counter[result_state] += 1
|
||||
if out_fhand is not None:
|
||||
out_fhand.write(f'Growth medium {new_gm.record_name}: {result_state}\n')
|
||||
|
||||
|
||||
def main():
|
||||
args = get_cmd_args()
|
||||
input_fhand = args['input_fhand']
|
||||
spec_version = args['version']
|
||||
out_fhand = sys.stdout
|
||||
error_log = validate_mirri_excel(input_fhand, version=spec_version)
|
||||
errors = error_log.get_errors()
|
||||
skip_first_num = args['skip_first_num']
|
||||
if errors:
|
||||
write_errors_in_screen(errors, out_fhand)
|
||||
sys.exit(1)
|
||||
|
||||
input_fhand.seek(0)
|
||||
parsed_objects = parse_mirri_excel(input_fhand, version=spec_version)
|
||||
strains = list(parsed_objects['strains'])
|
||||
growth_media = list(parsed_objects['growth_media'])
|
||||
|
||||
server_url = PROD_SERVER_URL if args['use_production_server'] else TEST_SERVER_URL
|
||||
|
||||
client = BiolomicsMirriClient(server_url=server_url, api_version='v2',
|
||||
client_id=args['client_id'],
|
||||
client_secret=args['client_secret'],
|
||||
username=args['user'],
|
||||
password=args['password'],
|
||||
verbose=args['verbose'])
|
||||
|
||||
if args['add_gm']:
|
||||
client.start_transaction()
|
||||
counter = Counter()
|
||||
try:
|
||||
create_or_upload_growth_media(client, growth_media, update=args['update'],
|
||||
counter=counter, out_fhand=out_fhand)
|
||||
except (Exception, KeyboardInterrupt) as error:
|
||||
out_fhand.write('There were some errors in the Growth media upload\n')
|
||||
out_fhand.write(str(error) + '\n')
|
||||
out_fhand.write('Rolling back\n')
|
||||
client.rollback()
|
||||
raise
|
||||
client.finish_transaction()
|
||||
show_stats(counter, 'Growth Media', out_fhand)
|
||||
|
||||
if args['add_strains']:
|
||||
client.start_transaction()
|
||||
counter = Counter()
|
||||
try:
|
||||
create_or_upload_strains(client, strains, update=args['update'],
|
||||
counter=counter,
|
||||
out_fhand=out_fhand, seek=skip_first_num)
|
||||
client.finish_transaction()
|
||||
except (Exception, KeyboardInterrupt) as error:
|
||||
out_fhand.write('There were some errors in the Strain upload\n')
|
||||
out_fhand.write(str(error) + '\n')
|
||||
out_fhand.write('rolling back\n')
|
||||
# client.rollback()
|
||||
raise
|
||||
client.finish_transaction()
|
||||
show_stats(counter, 'Strains', out_fhand)
|
||||
|
||||
|
||||
def show_stats(counter, kind, out_fhand):
|
||||
out_fhand.write(f'{kind}\n')
|
||||
line = ''.join(['-'] * len(kind))
|
||||
out_fhand.write(f"{line}\n")
|
||||
for kind2, value in counter.most_common(5):
|
||||
out_fhand.write(f'{kind2}: {value}\n')
|
||||
out_fhand.write('\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -1,224 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
from cmath import nan
|
||||
import sys
|
||||
from collections import Counter
|
||||
|
||||
#
|
||||
from mirri.biolomics.pipelines.growth_medium import get_or_create_or_update_growth_medium
|
||||
from mirri.biolomics.pipelines.strain import get_or_create_or_update_strain
|
||||
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||
from mirri.io.parsers.mirri_excel import parse_mirri_excel
|
||||
from mirri.validation.excel_validator import validate_mirri_excel
|
||||
|
||||
##Database
|
||||
from sqlalchemy import create_engine, MetaData
|
||||
import pymysql
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
|
||||
# DEFINE THE DATABASE CREDENTIALS
|
||||
user = 'mirridev'
|
||||
password = 'estramboticandolotodo'
|
||||
host = 'mirri-is.mirri.org'
|
||||
port = 33066
|
||||
database = 'mirri-db'
|
||||
|
||||
TEST_SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
|
||||
PROD_SERVER_URL = 'https://webservices.bio-aware.com/mirri'
|
||||
|
||||
def show_stats(counter, kind, out_fhand):
|
||||
out_fhand.write(f'{kind}\n')
|
||||
line = ''.join(['-'] * len(kind))
|
||||
out_fhand.write(f"{line}\n")
|
||||
for kind2, value in counter.most_common(5):
|
||||
out_fhand.write(f'{kind2}: {value}\n')
|
||||
out_fhand.write('\n')
|
||||
|
||||
|
||||
def get_cmd_args():
|
||||
desc = "Upload strains to MIRRI-IS"
|
||||
parser = argparse.ArgumentParser(description=desc)
|
||||
parser.add_argument('-i' , '--input', required=True, help='Validated Excel file', type=argparse.FileType('rb'))
|
||||
parser.add_argument('-v' , '--spec_version', default='20200601', help='Version of he specification of the given excel file')
|
||||
parser.add_argument('-u' , '--ws_user', help='Username of the web service')
|
||||
parser.add_argument('-p' , '--ws_password', required=True, help='Password of the web service user')
|
||||
parser.add_argument('-c' , '--client_id', required=True, help='Client id of the web service')
|
||||
parser.add_argument('-s' , '--client_secret', required=True, help='Client secret of the web service')
|
||||
parser.add_argument('--force_update' , required=False, action='store_true', help='Use it if you want to update the existing strains')
|
||||
parser.add_argument('--verbose' , action='store_true', help='use it if you want a verbose output')
|
||||
parser.add_argument('--prod' , action='store_true', help='Use production server')
|
||||
parser.add_argument('--dont_add_gm' , default=True, action='store_false', help="Don't add growth media")
|
||||
parser.add_argument('--dont_add_strains', default=True, action='store_false', help="Don't add growth media")
|
||||
parser.add_argument('--skip_first_num' , type=int, help='skip first X strains to the tool')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return {'input_fhand': args.input
|
||||
,'user': args.ws_user
|
||||
,'version': args.spec_version
|
||||
,'password': args.ws_password
|
||||
,'client_id': args.client_id
|
||||
,'client_secret': args.client_secret
|
||||
,'update': args.force_update
|
||||
,'verbose': args.verbose
|
||||
,'use_production_server': args.prod
|
||||
,'add_gm': args.dont_add_gm
|
||||
,'add_strains': args.dont_add_strains
|
||||
,'skip_first_num': args.skip_first_num
|
||||
}
|
||||
|
||||
|
||||
def write_errors_in_screen(errors, fhand=sys.stderr):
|
||||
for key, errors_by_type in errors.items():
|
||||
fhand.write(f'{key}\n')
|
||||
fhand.write('-' * len(key) + '\n')
|
||||
for error in errors_by_type:
|
||||
if error.pk:
|
||||
fhand.write(f'{error.pk}: ')
|
||||
fhand.write(f'{error.message} - {error.code}\n')
|
||||
fhand.write('\n')
|
||||
|
||||
|
||||
def create_or_upload_strains(client, strains, update=False, counter=None, out_fhand=None, seek=None):
|
||||
|
||||
for index, strain in enumerate(strains):
|
||||
if seek is not None and index < seek:
|
||||
continue
|
||||
# if strain.id.strain_id != 'CECT 5766':
|
||||
# continue
|
||||
result = get_or_create_or_update_strain(client, strain, update=update)
|
||||
|
||||
new_strain = result['record']
|
||||
created = result['created']
|
||||
updated = result.get('updated', False)
|
||||
if updated:
|
||||
result_state = 'updated'
|
||||
elif created:
|
||||
result_state = 'created'
|
||||
else:
|
||||
result_state = 'not modified'
|
||||
|
||||
if counter is not None:
|
||||
counter[result_state] += 1
|
||||
if out_fhand is not None:
|
||||
out_fhand.write(f'{index}: Strain {new_strain.id.strain_id}: {result_state}\n')
|
||||
# break
|
||||
|
||||
|
||||
def create_or_upload_growth_media(client, growth_media, update=False, counter=None, out_fhand=None):
|
||||
|
||||
for gm in growth_media:
|
||||
result = get_or_create_or_update_growth_medium(client, gm, update)
|
||||
|
||||
new_gm = result['record']
|
||||
created = result['created']
|
||||
updated = result.get('updated', False)
|
||||
if updated:
|
||||
result_state = 'updated'
|
||||
elif created:
|
||||
result_state = 'created'
|
||||
else:
|
||||
result_state = 'not modified'
|
||||
if counter is not None:
|
||||
counter[result_state] += 1
|
||||
if out_fhand is not None:
|
||||
out_fhand.write(f'Growth medium {new_gm.record_name}: {result_state}\n')
|
||||
|
||||
|
||||
def get_connection():
|
||||
# PYTHON FUNCTION TO CONNECT TO THE MYSQL DATABASE AND
|
||||
# RETURN THE SQLACHEMY ENGINE OBJECT
|
||||
return create_engine(url="mysql+pymysql://{0}:{1}@{2}:{3}/{4}".format(user, password, host, port, database))
|
||||
|
||||
def main():
|
||||
## Load Excel
|
||||
path = Path('C://data//brclims_excel.xlsx')
|
||||
Excel_Data = pd.read_excel(path, sheet_name = None)
|
||||
cc_id=1
|
||||
|
||||
## Load Database
|
||||
sqlEngine = get_connection()
|
||||
engine = sqlEngine.connect()
|
||||
|
||||
## Create new file upload
|
||||
with engine.connect() as conn:
|
||||
metaDats = MetaData(conn, schema=database)
|
||||
metaDats.reflect(bind=conn)
|
||||
table = metaDats.tables['mirri-db.file_upload']
|
||||
stmt = table.insert().values(filename=path.name,cc_id=cc_id)
|
||||
aux=conn.execute(stmt).inserted_primary_key[0]
|
||||
|
||||
## Load all Sheet from Excel to DB
|
||||
for key in Excel_Data.keys():
|
||||
#print(key)
|
||||
n=Excel_Data[key].replace(np.nan, '', regex=True).astype(str)
|
||||
n.columns = n.columns.str.replace(' ','_')
|
||||
n['f_id']=aux
|
||||
n.to_sql(key, engine, index=False, if_exists='append')
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
args = get_cmd_args()
|
||||
input_fhand = args['input_fhand']
|
||||
spec_version = args['version']
|
||||
out_fhand = sys.stdout
|
||||
error_log = validate_mirri_excel(input_fhand, version=spec_version)
|
||||
errors = error_log.get_errors()
|
||||
skip_first_num = args['skip_first_num']
|
||||
if errors:
|
||||
write_errors_in_screen(errors, out_fhand)
|
||||
sys.exit(1)
|
||||
|
||||
input_fhand.seek(0)
|
||||
parsed_objects = parse_mirri_excel(input_fhand, version=spec_version)
|
||||
strains = list(parsed_objects['strains'])
|
||||
growth_media = list(parsed_objects['growth_media'])
|
||||
|
||||
server_url = PROD_SERVER_URL if args['use_production_server'] else TEST_SERVER_URL
|
||||
|
||||
client = BiolomicsMirriClient(server_url=server_url, api_version='v2',
|
||||
client_id=args['client_id'],
|
||||
client_secret=args['client_secret'],
|
||||
username=args['user'],
|
||||
password=args['password'],
|
||||
verbose=args['verbose'])
|
||||
|
||||
if args['add_gm']:
|
||||
client.start_transaction()
|
||||
counter = Counter()
|
||||
try:
|
||||
create_or_upload_growth_media(client, growth_media, update=args['update'],
|
||||
counter=counter, out_fhand=out_fhand)
|
||||
except (Exception, KeyboardInterrupt) as error:
|
||||
out_fhand.write('There were some errors in the Growth media upload\n')
|
||||
out_fhand.write(str(error) + '\n')
|
||||
out_fhand.write('Rolling back\n')
|
||||
client.rollback()
|
||||
raise
|
||||
client.finish_transaction()
|
||||
show_stats(counter, 'Growth Media', out_fhand)
|
||||
|
||||
if args['add_strains']:
|
||||
client.start_transaction()
|
||||
counter = Counter()
|
||||
try:
|
||||
create_or_upload_strains(client, strains, update=args['update'],
|
||||
counter=counter,
|
||||
out_fhand=out_fhand, seek=skip_first_num)
|
||||
client.finish_transaction()
|
||||
except (Exception, KeyboardInterrupt) as error:
|
||||
out_fhand.write('There were some errors in the Strain upload\n')
|
||||
out_fhand.write(str(error) + '\n')
|
||||
out_fhand.write('rolling back\n')
|
||||
# client.rollback()
|
||||
raise
|
||||
client.finish_transaction()
|
||||
show_stats(counter, 'Strains', out_fhand)
|
||||
"""
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -1,21 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from mirri.validation.excel_validator import validate_mirri_excel
|
||||
import warnings
|
||||
warnings.simplefilter("ignore")
|
||||
|
||||
|
||||
def main():
|
||||
# path = Path(sys.argv[1])
|
||||
path = Path( 'C:/data/brclims_excel.xlsx')
|
||||
|
||||
error_log = validate_mirri_excel(path.open("rb"))
|
||||
|
||||
for errors in error_log.get_errors().values():
|
||||
for error in errors:
|
||||
print(error.pk, error.message, error.code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
io/parsers/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
io/parsers/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
io/parsers/__pycache__/excel.cpython-311.pyc
Normal file
BIN
io/parsers/__pycache__/excel.cpython-311.pyc
Normal file
Binary file not shown.
@ -37,14 +37,14 @@ TRUEFALSE_TRANSLATOR = {
|
||||
}
|
||||
|
||||
|
||||
def parse_mirri_excel(fhand, version="20200601"):
|
||||
if version == "20200601":
|
||||
return _parse_mirri_v20200601(fhand)
|
||||
def parse_mirri_excel(fhand, version=""):
|
||||
if version == "5.1.2":
|
||||
return _parse_mirri_v12052023(fhand)
|
||||
else:
|
||||
raise NotImplementedError("Only version 20200601 is implemented")
|
||||
raise NotImplementedError("Only version is 5.1.2 implemented")
|
||||
|
||||
|
||||
def _parse_mirri_v20200601(fhand):
|
||||
def _parse_mirri_v12052023(fhand):
|
||||
fhand.seek(0)
|
||||
file_content = BytesIO(fhand.read())
|
||||
wb = load_workbook(filename=file_content, read_only=True, data_only=True)
|
||||
@ -64,7 +64,6 @@ def _parse_mirri_v20200601(fhand):
|
||||
|
||||
return {"strains": strains, "growth_media": growth_media}
|
||||
|
||||
|
||||
def index_list_by(list_, id_):
|
||||
return {str(item[id_]): item for item in list_}
|
||||
|
||||
@ -125,7 +124,7 @@ def parse_strains(wb, locations, growth_media, markers, publications,
|
||||
publications = index_list_by_attr(publications, 'id')
|
||||
markers = index_markers(markers)
|
||||
|
||||
for strain_row in workbook_sheet_reader(wb, STRAINS, "Accession number"):
|
||||
for strain_row in workbook_sheet_reader(wb, STRAINS, "accessionNumber"):
|
||||
strain = StrainMirri()
|
||||
strain_id = None
|
||||
label = None
|
||||
@ -140,7 +139,7 @@ def parse_strains(wb, locations, growth_media, markers, publications,
|
||||
collection, number = value.split(" ", 1)
|
||||
value = StrainId(collection=collection, number=number)
|
||||
rsetattr(strain, attribute, value)
|
||||
|
||||
|
||||
elif attribute == "restriction_on_use":
|
||||
rsetattr(strain, attribute, RESTRICTION_USE_TRANSLATOR[value])
|
||||
elif attribute == "nagoya_protocol":
|
||||
@ -202,9 +201,19 @@ def parse_strains(wb, locations, growth_media, markers, publications,
|
||||
items = value.split(";")
|
||||
strain.collect.location.latitude = float(items[0])
|
||||
strain.collect.location.longitude = float(items[1])
|
||||
strain.collect.location.precision = float(items[2])
|
||||
strain.collect.location.altitude = float(items[3])
|
||||
if len(items) > 4:
|
||||
strain.collect.location.coord_uncertainty = items[4]
|
||||
|
||||
elif attribute == "collect.site.links":
|
||||
items = value.split(";")
|
||||
strain.collect.site.links.nameSite = str(items[0])
|
||||
strain.collect.site.links.urlSite = str(items[1])
|
||||
rsetattr(strain, attribute, value.split(";")) #ver o separador
|
||||
if len(items) > 2:
|
||||
strain.collect.location.coord_uncertainty = items[2]
|
||||
|
||||
strain.collect.site.links.site_uncertainty = items[2]
|
||||
|
||||
elif attribute == "collect.location":
|
||||
location = locations[value]
|
||||
if 'Country' in location and location['Country']:
|
||||
@ -50,11 +50,10 @@ PUB_HEADERS = [pb["label"] for pb in PUBLICATION_FIELDS]
|
||||
|
||||
|
||||
def write_mirri_excel(path, strains, growth_media, version):
|
||||
if version == "20200601":
|
||||
_write_mirri_excel_20200601(path, strains, growth_media)
|
||||
if version == "5.1.2":
|
||||
_write_mirri_excel_12052023(path, strains, growth_media)
|
||||
|
||||
|
||||
def _write_mirri_excel_20200601(path, strains, growth_media):
|
||||
def _write_mirri_excel_12052023(path, strains, growth_media):
|
||||
wb = Workbook()
|
||||
|
||||
write_markers_sheet(wb)
|
||||
@ -104,7 +103,7 @@ def _write_mirri_excel_20200601(path, strains, growth_media):
|
||||
redimension_cell_width(pub_sheet)
|
||||
|
||||
# write sexual states
|
||||
sex_sheet = wb.create_sheet("Sexual states")
|
||||
sex_sheet = wb.create_sheet("Sexual state")
|
||||
for sex_state in sorted(list(sexual_states)):
|
||||
sex_sheet.append([sex_state])
|
||||
redimension_cell_width(sex_sheet)
|
||||
@ -121,7 +120,6 @@ def _write_mirri_excel_20200601(path, strains, growth_media):
|
||||
del wb["Sheet"]
|
||||
wb.save(str(path))
|
||||
|
||||
|
||||
def _deserialize_strains(strains, locations, growth_media_indexes,
|
||||
publications, sexual_states, genomic_markers):
|
||||
for strain in strains:
|
||||
@ -189,10 +187,21 @@ def _deserialize_strains(strains, locations, growth_media_indexes,
|
||||
elif attribute == "collect.location.coords":
|
||||
lat = strain.collect.location.latitude
|
||||
long = strain.collect.location.longitude
|
||||
if lat is not None and long is not None:
|
||||
value = f"{lat};{long}"
|
||||
alt = strain.collect.location.altitude
|
||||
prec = strain.collect.location.precision
|
||||
if lat is not None and long is not None and prec is not None and alt is not None:
|
||||
value = f"{lat};{long};{prec};{alt}"
|
||||
else:
|
||||
value = None
|
||||
value = None
|
||||
elif attribute == "collect.site.links":
|
||||
name = strain.collect.site.links.nameSite
|
||||
url = strain.collect.site.links.urlSite
|
||||
value = rgetattr(strain, attribute)
|
||||
value = ";".join(value)
|
||||
if name is not None and url is not None:
|
||||
value = f"{name};{url}"
|
||||
else:
|
||||
value = None
|
||||
|
||||
elif attribute == "collect.location":
|
||||
location = strain.collect.location
|
||||
@ -1,50 +0,0 @@
|
||||
from mirri import rgetattr
|
||||
|
||||
|
||||
def validate_strain(strain, version='20200601'):
|
||||
if version == '20200601':
|
||||
return _validate_strain_v20200601(strain)
|
||||
raise NotImplementedError('Only v20200601 is implemented')
|
||||
|
||||
|
||||
def _validate_strain_v20200601(strain):
|
||||
mandatory_attrs = [{'label': 'Accession Number', 'attr': 'id.strain_id'},
|
||||
{'label': 'Nagoya protocol', 'attr': 'nagoya_protocol'},
|
||||
{'label': 'Restriction on use', 'attr': 'restriction_on_use'},
|
||||
{'label': 'Risk group', 'attr': 'risk_group'},
|
||||
{'label': 'Organism type', 'attr': 'taxonomy.organism_type'},
|
||||
{'label': 'Taxon name', 'attr': 'taxonomy.long_name'},
|
||||
{'label': 'Recommended temperature to growth', 'attr': 'growth.recommended_temp'},
|
||||
{'label': 'Recommended media', 'attr': 'growth.recommended_media'},
|
||||
{'label': 'Form of supply', 'attr': 'form_of_supply'},
|
||||
{'label': 'Country', 'attr': 'collect.location.country'}]
|
||||
|
||||
errors = []
|
||||
|
||||
for mandatory in mandatory_attrs:
|
||||
value = rgetattr(strain, mandatory['attr'])
|
||||
if value is None:
|
||||
errors.append(f"{mandatory['label']} is mandatory field")
|
||||
|
||||
if not is_valid_nagoya(strain):
|
||||
errors.append('Not compliant wih nagoya protocol requirements')
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def is_valid_nagoya(strain):
|
||||
# nagoya_requirements
|
||||
_date = strain.collect.date
|
||||
if _date is None:
|
||||
_date = strain.isolation.date
|
||||
if _date is None:
|
||||
_date = strain.deposit.date
|
||||
if _date is None:
|
||||
_date = strain.catalog_inclusion_date
|
||||
# print(_date)
|
||||
year = None if _date is None else _date._year
|
||||
|
||||
if year is not None and year >= 2014 and strain.collect.location.country is None:
|
||||
return False
|
||||
|
||||
return True
|
||||
@ -1,414 +0,0 @@
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class ErrorMessage():
|
||||
"""Error message
|
||||
|
||||
Args:
|
||||
code (str): Error code.
|
||||
pk (str | optional): The instance's primary key that triggered the error. Defaults to None.
|
||||
value (str | optional): The instance's value that triggered the error. Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(self, code: str, pk: Optional[str] = None, value: Optional[str] = None):
|
||||
self.code = code.upper()
|
||||
self.pk = pk
|
||||
self.value = value
|
||||
|
||||
@property
|
||||
def _codes(self) -> list:
|
||||
return [
|
||||
func
|
||||
for func in dir(self)
|
||||
if func.isupper() and
|
||||
callable(getattr(self, func)) and
|
||||
not func.startswith("__")
|
||||
]
|
||||
|
||||
@property
|
||||
def _messages(self) -> dict:
|
||||
return {code: getattr(self, code) for code in self._codes}
|
||||
|
||||
@property
|
||||
def message(self) -> str:
|
||||
if not self._validate_code():
|
||||
raise ValueError(f"{self.code} not found")
|
||||
return self._messages[self.code]()
|
||||
|
||||
@property
|
||||
def code(self) -> str:
|
||||
return self._code
|
||||
|
||||
@code.setter
|
||||
def code(self, code: str) -> None:
|
||||
self._code = code.upper()
|
||||
|
||||
def _validate_code(self) -> bool:
|
||||
return self.code in self._codes
|
||||
|
||||
@property
|
||||
def pk(self) -> str:
|
||||
return self._pk
|
||||
|
||||
@pk.setter
|
||||
def pk(self, pk: str) -> None:
|
||||
self._pk = pk
|
||||
|
||||
@property
|
||||
def value(self) -> str:
|
||||
return self._value
|
||||
|
||||
@value.setter
|
||||
def value(self, value: str) -> None:
|
||||
self._value = value
|
||||
|
||||
"""
|
||||
Excel File Structure Error Codes
|
||||
"""
|
||||
|
||||
def EXL00(self):
|
||||
return f"The provided file '{self.pk}' is not an excel(xlsx) file"
|
||||
|
||||
def EFS01(self):
|
||||
return "The 'Growth media' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS02(self):
|
||||
return "The 'Geographic origin' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS03(self):
|
||||
return "The 'Literature' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS04(self):
|
||||
return "The 'Sexual state' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS05(self):
|
||||
return "The 'Strains' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS06(self):
|
||||
return "The 'Ontobiotope' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS07(self):
|
||||
return "The 'Markers' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS08(self):
|
||||
return "The 'Genomic information' sheet is missing. Please check the provided excel template."
|
||||
|
||||
"""
|
||||
Growth Media Error Codes
|
||||
"""
|
||||
|
||||
def GMD01(self):
|
||||
return "The 'Acronym' column is a mandatory field in the Growth Media sheet."
|
||||
|
||||
def GMD02(self):
|
||||
return "The 'Acronym' column is empty or has missing values."
|
||||
|
||||
def GMD03(self):
|
||||
return "The 'Description' column is a mandatory field in the Growth Media sheet. The column can not be empty."
|
||||
|
||||
def GMD04(self):
|
||||
return f"The 'Description' for growth media with Acronym {self.pk} is missing."
|
||||
|
||||
"""
|
||||
Geographic Origin Error Codes
|
||||
"""
|
||||
|
||||
def GOD01(self):
|
||||
return "The 'ID' column is a mandatory field in the Geographic Origin sheet."
|
||||
|
||||
def GOD02(self):
|
||||
return "The 'ID' column is empty or has missing values."
|
||||
|
||||
def GOD03(self):
|
||||
return "The 'Country' column is a mandatory field in the Geographic Origin sheet. The column can not be empty."
|
||||
|
||||
def GOD04(self):
|
||||
return f"The 'Country' for geographic origin with ID {self.pk} is missing."
|
||||
|
||||
def GOD05(self):
|
||||
return f"The 'Country' for geographic origin with ID {self.pk} is incorrect."
|
||||
|
||||
def GOD06(self):
|
||||
return f"The 'Locality' column is a mandatory field in the Geographic Origin sheet. The column can not be empty."
|
||||
|
||||
def GOD07(self):
|
||||
return f"The 'Locality' for geographic origin with ID {self.pk} is missing."
|
||||
|
||||
"""
|
||||
Literature Error Codes
|
||||
"""
|
||||
|
||||
def LID01(self):
|
||||
return "The 'ID' column is a mandatory field in the Literature sheet."
|
||||
|
||||
def LID02(self):
|
||||
return "The 'ID' column empty or missing values."
|
||||
|
||||
def LID03(self):
|
||||
return "The 'Full reference' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||
|
||||
def LID04(self):
|
||||
return f"The 'Full reference' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID05(self):
|
||||
return "The 'Authors' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||
|
||||
def LID06(self):
|
||||
return f"The 'Authors' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID07(self):
|
||||
return "The 'Title' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||
|
||||
def LID08(self):
|
||||
return f"The 'Title' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID09(self):
|
||||
return "The 'Journal' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||
|
||||
def LID10(self):
|
||||
return f"The 'Journal' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID11(self):
|
||||
return "The 'Year' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||
|
||||
def LID12(self,):
|
||||
return f"The 'Year' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID13(self):
|
||||
return "The 'Volume' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||
|
||||
def LID14(self):
|
||||
return f"The 'Volume' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID15(self):
|
||||
return "The 'First page' column is a mandatory field. The column can not be empty."
|
||||
|
||||
def LID16(self):
|
||||
return f"The 'First page' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID17(self):
|
||||
msg = 'If journal; Title, Authors, journal, year and first page are required'
|
||||
msg += 'If Book; Book Title, Authors, Year, Editors, Publishers'
|
||||
return msg
|
||||
|
||||
"""
|
||||
Strains Error Codes
|
||||
"""
|
||||
|
||||
def STD01(self):
|
||||
return "The 'Accession number' column is a mandatory field in the Strains sheet."
|
||||
|
||||
def STD02(self):
|
||||
return "The 'Accession number' column is empty or has missing values."
|
||||
|
||||
def STD03(self):
|
||||
return f"The 'Accesion number' must be unique. The '{self.value}' is repeated."
|
||||
|
||||
def STD04(self):
|
||||
return (f"The 'Accession number' {self.pk} is not according to the specification."
|
||||
" The value must be of the format '<Sequence of characters> <sequence of characters>'.")
|
||||
|
||||
def STD05(self):
|
||||
return f"The 'Restriction on use' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD06(self):
|
||||
return f"The 'Restriction on use' for strain with Accession Number {self.pk} is missing."
|
||||
|
||||
def STD07(self):
|
||||
return (f"The 'Restriction on use' for strain with Accession Number {self.pk} is not according to the specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
||||
|
||||
def STD08(self):
|
||||
return f"The 'Nagoya protocol restrictions and compliance conditions' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD09(self):
|
||||
return f"The 'Nagoya protocol restrictions and compliance conditions' for strain with Accession Number {self.pk} is missing."
|
||||
|
||||
def STD10(self):
|
||||
return (f"The 'Nagoya protocol restrictions and compliance conditions' for strain with Accession Number {self.pk} is not according to the specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
||||
|
||||
def STD11(self):
|
||||
return (f"The 'Strain from a registered collection' for strain with Accession Number {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
||||
|
||||
def STD12(self):
|
||||
return "The 'Risk group' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD13(self):
|
||||
return f"The 'Risk group' for strain with Accession Number {self.pk} is missing."
|
||||
|
||||
def STD14(self):
|
||||
return (f"The 'Risk group' for strain with Accession Number {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2, 3, 4.")
|
||||
|
||||
def STD15(self):
|
||||
return (f"The 'Dual use' for strain with Accession Number {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||
|
||||
def STD16(self):
|
||||
return (f"The “Quarantine in europe” for strain with Accession Number {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||
|
||||
def STD17(self):
|
||||
return f"The 'Organism type' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD18(self):
|
||||
return f"The 'Organism type' for strain with Accession Number {self.pk} is missing."
|
||||
|
||||
def STD19(self):
|
||||
return (f"The 'Organism type' for strain with Accession Number {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 'Algae', 'Archaea', 'Bacteria', 'Cyanobacteria', "
|
||||
"'Filamentous Fungi', 'Phage', 'Plasmid', 'Virus', 'Yeast', 1, 2, 3, 4, 5, 6, 7, 8, 9.")
|
||||
|
||||
def STD20(self):
|
||||
return f"The 'Taxon name' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD21(self):
|
||||
return f"The 'Taxon name' for strain with Accession Number {self.pk} is missing."
|
||||
|
||||
def STD22(self):
|
||||
return f"The 'Taxon name' for strain with Accession Number {self.pk} is incorrect."
|
||||
|
||||
def STD23(self):
|
||||
return (f"The 'Interspecific hybrid' for strain with Accession Number {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||
|
||||
def STD24(self):
|
||||
return f"The 'History of deposit' for strain with Accession Number {self.pk} is incorrect."
|
||||
|
||||
def STD25(self):
|
||||
return (f"The 'Date of deposit' for strain with Accession Number {self.pk} is incorrect."
|
||||
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||
|
||||
def STD26(self):
|
||||
return (f"The 'Date of inclusion in the catalogue' for strain with Accession Number {self.pk} is incorrect."
|
||||
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||
|
||||
def STD27(self):
|
||||
return (f"The 'Date of collection' for strain with Accession Number {self.pk} is incorrect."
|
||||
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||
|
||||
def STD28(self):
|
||||
return (f"The 'Date of isolation' for strain with Accession Number {self.pk} is incorrect."
|
||||
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||
|
||||
def STD29(self):
|
||||
return (f"The 'Tested temperature growth range' for strain with Accession Number {self.pk} is incorrect."
|
||||
" It must have two decimal numbers separated by ','")
|
||||
|
||||
def STD30(self):
|
||||
return f"The 'Recommended growth temperature' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD31(self):
|
||||
return f"The 'Recommended growth temperature' for strain with Accession Number {self.pk} is missing."
|
||||
|
||||
def STD32(self):
|
||||
return (f"The 'Recommended growth temperature' for strain with Accession Number {self.pk} is incorrect."
|
||||
" It must have two decimal numbers separated by ','.")
|
||||
|
||||
def STD33(self):
|
||||
return f"The 'Recommended medium for growth' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD34(self):
|
||||
return f"The 'Recommended medium for growth' for strain with Accession Number {self.pk} is missing."
|
||||
|
||||
def STD35(self):
|
||||
return f"The value of 'Recommended medium for growth' for strain with Accession Number {self.pk} is not in the Growth Media Sheet."
|
||||
|
||||
def STD36(self):
|
||||
return f"The 'Forms of supply' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD37(self):
|
||||
return f"The 'Forms of supply' for strain with Accession Number {self.pk} is missing."
|
||||
|
||||
def STD38(self):
|
||||
return f"The value of 'Forms of supply' for strain with Accession Number {self.pk} is not in the Forms of Supply Sheet."
|
||||
|
||||
def STD39(self):
|
||||
return (f"The 'Coordinates of geographic origin' column for strain with Accession Number {self.pk} is incorrect."
|
||||
"The allowed formats are two or three decimal numbers separated by ','. Moreover, the first number must be"
|
||||
"between [-90, 90], the second between [-180, 180], and the third, if provided, can assume any value.")
|
||||
|
||||
def STD40(self):
|
||||
return (f"The 'Altitude of geographic origin' column for strain with Accession Number {self.pk} is incorrect."
|
||||
"The allowed formats are one decimal number between [-200, 8000].")
|
||||
|
||||
def STD41(self):
|
||||
return f"The value of 'Ontobiotope term for the isolation habitat' for strain with Accession Number {self.pk} is not in the Ontobiotope Sheet."
|
||||
|
||||
def STD42(self):
|
||||
return (f"The 'GMO' for strain with Accession Number {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2")
|
||||
|
||||
def STD43(self):
|
||||
return (f"The 'Sexual State' for strain with Accession Number {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 'Mata', 'Matalpha', 'Mata/Matalpha', "
|
||||
"'Matb', 'Mata/Matb', 'MTLa', 'MTLalpha', 'MTLa/MTLalpha', 'MAT1-1', 'MAT1-2', 'MAT1', 'MAT2', 'MT+', 'MT-'")
|
||||
|
||||
def STD44(self):
|
||||
return (f"The 'Ploidy' for strain with Accession Number {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 0, 1, 2, 3, 4, 9")
|
||||
|
||||
def STD45(self):
|
||||
msg = f"At least one of the values '{self.value}' of the literature field for strain {self.pk} are not in the literature sheet. "
|
||||
msg += "If the those values are Pubmed ids or DOIs, please ignore this messsage"
|
||||
return msg
|
||||
|
||||
def STD46(self):
|
||||
msg = f"If date of collection/isolation/deposit/inclusion in the catalog is after 2014," \
|
||||
f" the value of column Geographic Origin must be provided and associated with a country in the " \
|
||||
f"Geographic Origin sheet. The value is missing or not associated with a country for strain {self.pk}."
|
||||
return msg
|
||||
|
||||
|
||||
"""
|
||||
Genomic Information Error Codes
|
||||
"""
|
||||
|
||||
def GID01(self):
|
||||
return f"The 'Strain Acession Number' (Strain AN) column is a mandatory field in the Genomic Information Sheet."
|
||||
|
||||
def GID02(self):
|
||||
return f"The 'Strain Acession Number' (Strain AN) column is empty or has missing values."
|
||||
|
||||
def GID03(self):
|
||||
return f"The value of 'Strain Acession Number' (Strain AN) {self.value} is not in the Strains sheet."
|
||||
|
||||
def GID04(self):
|
||||
return f"The 'Marker' column is a mandatory field in the Genomic Information Sheet. The column can not be empty."
|
||||
|
||||
def GID05(self):
|
||||
return f"The 'Marker' for genomic information with Strain AN {self.pk} is missing."
|
||||
|
||||
def GID06(self):
|
||||
return f"The value of 'Marker' {self.value} is not in the Markers sheet."
|
||||
|
||||
def GID07(self):
|
||||
return f"The 'INSDC AN' column is a mandatory field in the Genomic Information Sheet. The column can not be empty."
|
||||
|
||||
def GID08(self):
|
||||
return f"The 'INSDC AN' for genomic information with Strain AN {self.pk} is missing."
|
||||
|
||||
def GID09(self):
|
||||
return f"The 'INSDC AN' for genomic information with Strain AN {self.pk} is incorrect."
|
||||
|
||||
def GID10(self):
|
||||
return (f"The 'Sequence' for genomic information with Strain AN {self.pk} is incorrect."
|
||||
" It must be a sequence of 'G', 'T', 'A', 'C' characteres of any length and without white spaces.")
|
||||
|
||||
"""
|
||||
Ontobiotope Error Codes
|
||||
"""
|
||||
|
||||
def OTD01(self):
|
||||
return "The 'ID' columns is a mandatory field in the Ontobiotope Sheet."
|
||||
|
||||
def OTD02(self):
|
||||
return "The 'ID' columns is empty or has missing values."
|
||||
|
||||
def OTD03(self):
|
||||
return "The 'Name' columns is a mandatory field in the Ontobiotope Sheet. The column can not be empty."
|
||||
|
||||
def OTD04(self):
|
||||
return f"The 'Name' for ontobiotope with ID {self.pk} is missing."
|
||||
@ -1,5 +0,0 @@
|
||||
openpyxl
|
||||
requests
|
||||
requests_oauthlib
|
||||
pycountry
|
||||
deepdiff
|
||||
@ -3,6 +3,7 @@ from pathlib import Path
|
||||
DATA_DIR = Path(__file__).parent / "data"
|
||||
|
||||
ACCESSION_NUMBER = "accession_number"
|
||||
MIRRI_ACCESSION_NUMBER = 'mirri_accession_number'
|
||||
RESTRICTION_ON_USE = "restriction_on_use"
|
||||
NAGOYA_PROTOCOL = "nagoya_protocol"
|
||||
ABS_RELATED_FILES = "abs_related_files"
|
||||
@ -14,6 +15,7 @@ DUAL_USE = "dual_use"
|
||||
QUARANTINE = "quarantine"
|
||||
ORGANISM_TYPE = "organism_type"
|
||||
TAXON_NAME = "taxon_name"
|
||||
TYPE = "type"
|
||||
INFRASUBSPECIFIC_NAME = "infrasubspecific_names"
|
||||
COMMENTS_ON_TAXONOMY = "comments_on_taxonomy"
|
||||
STATUS = "status"
|
||||
@ -54,6 +56,9 @@ SUBSTRATE_HOST_OF_ISOLATION = "substrate_host_of_isolation"
|
||||
ISOLATION_HABITAT = "isolation_habitat"
|
||||
ONTOBIOTOPE_ISOLATION_HABITAT = "ontobiotope_term_for_the_isolation_habitat"
|
||||
LITERATURE_LINKED_TO_SEQ_GENOME = "literature_linked_to_the_sequence_genome"
|
||||
AXENIC_CULTURE = "axenic_culture"
|
||||
QPS ="qps"
|
||||
SITE_LINK = "site_links"
|
||||
|
||||
# StrainId
|
||||
STRAIN_ID = "id"
|
||||
@ -99,73 +104,80 @@ ALLOWED_COLLECTING_SITE_KEYS = [
|
||||
]
|
||||
|
||||
MIRRI_FIELDS = [
|
||||
{"attribute": "id", "label": "Accession number"},
|
||||
{"attribute": "restriction_on_use", "label": "Restrictions on use"},
|
||||
{"attribute": "id", "label": "accessionNumber"},
|
||||
{"attribute": "mirri_accession_number", "label": "mirriAccessionNumber"},
|
||||
{"attribute": "qps", "label": "qps"},
|
||||
{"attribute": "axenic_culture", "label": "axenicCulture"},
|
||||
{"attribute": "restriction_on_use", "label": "useRestrictions"},
|
||||
{"attribute": "nagoya_protocol",
|
||||
"label": "Nagoya protocol restrictions and compliance conditions"},
|
||||
{"attribute": ABS_RELATED_FILES, "label": "ABS related files"},
|
||||
{"attribute": "mta_files", "label": "MTA file"},
|
||||
{"attribute": "other_numbers", "label": "Other culture collection numbers"},
|
||||
"label": "nagoyaConditions"},
|
||||
{"attribute": ABS_RELATED_FILES, "label": "absFile"},
|
||||
{"attribute": "mta_files", "label": "mtaFile"},
|
||||
{"attribute": "other_numbers", "label": "otherCollectionNumbers"},
|
||||
{"attribute": "is_from_registered_collection",
|
||||
"label": "Strain from a registered collection"},
|
||||
{"attribute": "risk_group", "label": "Risk Group"},
|
||||
{"attribute": "is_potentially_harmful", "label": "Dual use"},
|
||||
{"attribute": "is_subject_to_quarantine", "label": "Quarantine in Europe"},
|
||||
{"attribute": "taxonomy.organism_type", "label": "Organism type"},
|
||||
{"attribute": "taxonomy.taxon_name", "label": "Taxon name"},
|
||||
"label": "registeredCollection"},
|
||||
{"attribute": "risk_group", "label": "riskGroup"},
|
||||
{"attribute": "is_potentially_harmful", "label": "dualUse"},
|
||||
{"attribute": "is_subject_to_quarantine", "label": "euQuarantine"},
|
||||
{"attribute": "taxonomy.organism_type", "label": "organismType"},
|
||||
{"attribute": "taxonomy.taxon_name", "label": "speciesName"},
|
||||
{"attribute": "taxonomy.infrasubspecific_name",
|
||||
"label": "Infrasubspecific names"},
|
||||
{"attribute": "taxonomy.comments", "label": "Comment on taxonomy"},
|
||||
"label": "infrasubspecificNames"},
|
||||
{"attribute": "taxonomy.comments", "label": "taxonomyComments"},
|
||||
{"attribute": "taxonomy.interspecific_hybrid",
|
||||
"label": "Interspecific hybrid"},
|
||||
{"attribute": "status", "label": "Status"},
|
||||
{"attribute": "history", "label": "History of deposit", },
|
||||
{"attribute": "deposit.who", "label": "Depositor"},
|
||||
{"attribute": "deposit.date", "label": "Date of deposit"},
|
||||
"label": "hybrid"},
|
||||
{"attribute": "status", "label": "status"},
|
||||
{"attribute": "history", "label": "depositHistory", },
|
||||
{"attribute": "deposit.who", "label": "depositor"},
|
||||
{"attribute": "deposit.date", "label": "depositDate"},
|
||||
{"attribute": "catalog_inclusion_date",
|
||||
"label": "Date of inclusion in the catalogue"},
|
||||
{"attribute": "collect.who", "label": "Collected by"},
|
||||
{"attribute": "collect.date", "label": "Date of collection"},
|
||||
{"attribute": "isolation.who", "label": "Isolated by"},
|
||||
{"attribute": "isolation.date", "label": "Date of isolation"},
|
||||
"label": "accessionDate"},
|
||||
{"attribute": "collect.who", "label": "collector"},
|
||||
{"attribute": "collect.date", "label": "collectionDate"},
|
||||
{"attribute": "isolation.who", "label": "isolator"},
|
||||
{"attribute": "isolation.date", "label": "isolationDate"},
|
||||
{"attribute": "isolation.substrate_host_of_isolation",
|
||||
"label": "Substrate/host of isolation"},
|
||||
"label": "substrate"},
|
||||
{"attribute": "growth.tested_temp_range",
|
||||
"label": "Tested temperature growth range"},
|
||||
"label": "temperatureGrowthRange"},
|
||||
{"attribute": "growth.recommended_temp",
|
||||
"label": "Recommended growth temperature"},
|
||||
"label": "recommendedTemperature"},
|
||||
{"attribute": "growth.recommended_media",
|
||||
"label": "Recommended medium for growth"},
|
||||
{"attribute": "form_of_supply", "label": "Form of supply"},
|
||||
{"attribute": "other_denominations", "label": "Other denomination"},
|
||||
"label": "recommendedMedium"},
|
||||
{"attribute": "form_of_supply", "label": "supplyForms"},
|
||||
{"attribute": "other_denominations", "label": "otherDenomination"},
|
||||
{"attribute": "collect.location.coords",
|
||||
"label": "Coordinates of geographic origin"},
|
||||
"label": "geographicCoordinates"},
|
||||
{"attribute": "collect.site.links",
|
||||
"label": "siteLinks"},
|
||||
{"attribute": "collect.location.altitude",
|
||||
"label": "Altitude of geographic origin"},
|
||||
{"attribute": "collect.location", "label": "Geographic origin"},
|
||||
{"attribute": "collect.habitat", "label": "Isolation habitat"},
|
||||
"label": "country"},
|
||||
{"attribute": "collect.location", "label": "geographicOrigin"},
|
||||
{"attribute": "collect.habitat", "label": "isolationHabitat"},
|
||||
{"attribute": "collect.habitat_ontobiotope",
|
||||
"label": "Ontobiotope term for the isolation habitat"},
|
||||
{"attribute": "genetics.gmo", "label": "GMO"},
|
||||
"label": "ontobiotopeTerms"},
|
||||
{"attribute": "genetics.gmo", "label": "gmo"},
|
||||
{"attribute": "genetics.gmo_construction",
|
||||
"label": "GMO construction information"},
|
||||
{"attribute": "genetics.mutant_info", "label": "Mutant information"},
|
||||
{"attribute": "genetics.genotype", "label": "Genotype"},
|
||||
{"attribute": "genetics.sexual_state", "label": "Sexual state"},
|
||||
{"attribute": "genetics.ploidy", "label": "Ploidy"},
|
||||
{"attribute": "genetics.plasmids", "label": "Plasmids"},
|
||||
"label": "gmoConstruction"},
|
||||
{"attribute": "genetics.mutant_info", "label": "mutant"},
|
||||
{"attribute": "genetics.genotype", "label": "genotype"},
|
||||
{"attribute": "genetics.sexual_state", "label": "sexualState"},
|
||||
{"attribute": "genetics.ploidy", "label": "ploidy"},
|
||||
{"attribute": "genetics.plasmids", "label": "plasmids"},
|
||||
{"attribute": "genetics.plasmids_in_collections",
|
||||
"label": "Plasmids collections fields"},
|
||||
{"attribute": "publications", "label": "Literature"},
|
||||
"label": "plasmidCollections"},
|
||||
{"attribute": "publications", "label": "identificationLiterature"},
|
||||
{"attribute": PLANT_PATHOGENICITY_CODE, "label": "Plant pathogenicity code"},
|
||||
{"attribute": "pathogenicity", "label": "Pathogenicity"},
|
||||
{"attribute": "enzyme_production", "label": "Enzyme production"},
|
||||
{"attribute": "pathogenicity", "label": "pathogenicity"},
|
||||
{"attribute": "enzyme_production", "label": "enzymes"},
|
||||
{"attribute": "production_of_metabolites",
|
||||
"label": "Production of metabolites"},
|
||||
{"attribute": "applications", "label": "Applications", },
|
||||
{"attribute": "remarks", "label": "Remarks"},
|
||||
"label": "metabolites"},
|
||||
{"attribute": "type",
|
||||
"label": "type"},
|
||||
{"attribute": "applications", "label": "applications", },
|
||||
{"attribute": "remarks", "label": "remarks"},
|
||||
{"attribute": LITERATURE_LINKED_TO_SEQ_GENOME,
|
||||
"label": "Literature linked to the sequence/genome"},
|
||||
"label": "sequenceLiterature"},
|
||||
]
|
||||
|
||||
ALLOWED_SUBTAXA = ["subspecies", "variety", "convarietas", "group", "forma",
|
||||
@ -228,8 +240,9 @@ ALLOWED_MARKER_TYPES = [
|
||||
]
|
||||
|
||||
PUBLICATIONS = "publications"
|
||||
PUB_ID = "id"
|
||||
PUB_ID = "pub_id"
|
||||
PUB_DOI = "pub_doi"
|
||||
PUB_PMID = "pub_pmid"
|
||||
PUB_PUBMED_ID = ''
|
||||
PUB_FULL_REFERENCE = "full_reference"
|
||||
PUB_TITLE = "title"
|
||||
@ -247,6 +260,8 @@ BOOK_PUBLISHER = "book_publisher"
|
||||
|
||||
PUBLICATION_FIELDS = [
|
||||
{"label": "ID", "attribute": PUB_ID},
|
||||
{"label": "PMID", "attribute": PUB_PMID},
|
||||
{"label": "DOI", "attribute": PUB_DOI},
|
||||
{"label": "Full reference", "attribute": PUB_FULL_REFERENCE},
|
||||
{"label": "Authors", "attribute": PUB_AUTHORS},
|
||||
{"label": "Title", "attribute": PUB_TITLE},
|
||||
@ -282,15 +297,43 @@ SUBTAXAS = {
|
||||
"f.sp.": "forma.specialis"
|
||||
}
|
||||
|
||||
#Control
|
||||
VERSION = "Version"
|
||||
DATE = "Date"
|
||||
|
||||
|
||||
#Country codes
|
||||
COUNTRY = "Country"
|
||||
CODE = "Code"
|
||||
ADDITIONAL_INFORMATION_ON_THE_COUNTRY_OR_CODE = "Additional information on the country or code"
|
||||
|
||||
|
||||
#Country codes files
|
||||
COUNTRY_CODES_SHEET = [
|
||||
{"label": "Country", "attribute": COUNTRY},
|
||||
{"label": "Code", "attribute": CODE},
|
||||
{"label": "Additional information on the country or code", "attribute": ADDITIONAL_INFORMATION_ON_THE_COUNTRY_OR_CODE},
|
||||
]
|
||||
|
||||
|
||||
#Controle files
|
||||
CONTROL_FIELDS = [
|
||||
{"label": "Version", "attribute": VERSION},
|
||||
{"label": "Date", "attribute": DATE},
|
||||
]
|
||||
|
||||
# Excel sheet name
|
||||
LOCATIONS = "Geographic origin" # 'Locations'
|
||||
GROWTH_MEDIA = "Growth media"
|
||||
GENOMIC_INFO = "Genomic information"
|
||||
STRAINS = "Strains"
|
||||
LITERATURE_SHEET = "Literature"
|
||||
SEXUAL_STATE_SHEET = "Sexual states"
|
||||
SEXUAL_STATE_SHEET = "Sexual state"
|
||||
RESOURCE_TYPES_VALUES = "Resource types values"
|
||||
FORM_OF_SUPPLY_SHEET = "Forms of supply"
|
||||
PLOIDY_SHEET = "Ploidy"
|
||||
ONTOBIOTOPE = "Ontobiotope"
|
||||
MARKERS = "Markers"
|
||||
CONTROL_SHEET = "Version"
|
||||
COUNTRY_CODES_SHEET = "Country codes"
|
||||
RESOURCE_SHEET = 'Resource types values'
|
||||
35
setup.py
35
setup.py
@ -1,35 +0,0 @@
|
||||
import setuptools
|
||||
from pathlib import Path
|
||||
from setuptools import find_packages
|
||||
|
||||
with open("README.md", "r") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
requirements = [line.strip() for line in open('requirements.txt')]
|
||||
scripts = [str(f) for f in Path('./bin').glob('*.py')]
|
||||
|
||||
setuptools.setup(
|
||||
name="Mirri utils", # Replace with your own username
|
||||
version=0.1,
|
||||
author="P.Ziarsolo",
|
||||
author_email="pziarsolo@gmail.com",
|
||||
description="A small library to help dealing with MIRRI data",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/pziarsolo/mirri_utils",
|
||||
packages=find_packages(),
|
||||
package_data={"mirri": ['data/ontobiotopes.csv']},
|
||||
# package_dir={"mirri.entities": "mirri.entities"
|
||||
# "mirri.io.parsers": "mirri.io.parsers",
|
||||
# "mirri.io.writers": "mirri.io.writers",
|
||||
# 'mirri.validation': 'mirri.vallidation'},
|
||||
install_requires=requirements,
|
||||
scripts=scripts,
|
||||
license="GNU General Public License v3.0",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
python_requires='>=3.6',
|
||||
)
|
||||
@ -1,22 +0,0 @@
|
||||
import unittest
|
||||
|
||||
from mirri.biolomics.remote.rest_client import BiolomicsClient
|
||||
try:
|
||||
from mirri.biolomics.secrets import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
'You need a secrets.py in the project dir. with CLIENT_ID, SECRET_ID, USERNAME, PASSWORD')
|
||||
|
||||
from .utils import VERSION, SERVER_URL
|
||||
|
||||
|
||||
class BiolomicsClientAuthTest(unittest.TestCase):
|
||||
|
||||
def test_authentication(self):
|
||||
client = BiolomicsClient(SERVER_URL, VERSION, CLIENT_ID, SECRET_ID,
|
||||
USERNAME, PASSWORD)
|
||||
access1 = client.get_access_token()
|
||||
access2 = client.get_access_token()
|
||||
assert access1 is not None
|
||||
self.assertEqual(access1, access2)
|
||||
|
||||
@ -1,62 +0,0 @@
|
||||
import unittest
|
||||
|
||||
from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS
|
||||
from mirri.biolomics.serializers.growth_media import GrowthMedium
|
||||
from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
|
||||
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||
from tests.biolomics.utils import SERVER_URL, VERSION
|
||||
|
||||
|
||||
class BiolomicsSequenceClientTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
|
||||
SECRET_ID, USERNAME, PASSWORD)
|
||||
|
||||
def test_retrieve_media_by_id(self):
|
||||
record_id = 101
|
||||
growth_medium = self.client.retrieve_by_id('growth_medium', record_id)
|
||||
self.assertEqual(growth_medium.record_id, record_id)
|
||||
|
||||
self.assertEqual(growth_medium.record_name, 'MA2PH6')
|
||||
|
||||
def test_retrieve_media_by_id(self):
|
||||
record_name = 'MA2PH6'
|
||||
record_id = 101
|
||||
growth_medium = self.client.retrieve_by_name('growth_medium', record_name)
|
||||
self.assertEqual(growth_medium.record_id, record_id)
|
||||
self.assertEqual(growth_medium.record_name, record_name)
|
||||
|
||||
def test_create_growth_media(self):
|
||||
self.client.start_transaction()
|
||||
try:
|
||||
growth_medium = GrowthMedium()
|
||||
growth_medium.acronym = 'BBB'
|
||||
growth_medium.ingredients = 'alkhdflakhf'
|
||||
growth_medium.description = 'desc'
|
||||
|
||||
new_growth_medium = self.client.create(GROWTH_MEDIUM_WS, growth_medium)
|
||||
print(new_growth_medium.dict())
|
||||
finally:
|
||||
self.client.rollback()
|
||||
|
||||
def test_update_growth_media(self):
|
||||
self.client.start_transaction()
|
||||
try:
|
||||
growth_medium = GrowthMedium()
|
||||
growth_medium.acronym = 'BBB'
|
||||
growth_medium.ingredients = 'alkhdflakhf'
|
||||
growth_medium.description = 'desc'
|
||||
growth_medium.full_description = 'full'
|
||||
new_growth_medium = self.client.create(GROWTH_MEDIUM_WS, growth_medium)
|
||||
|
||||
new_growth_medium.full_description = 'full2'
|
||||
updated_gm = new_growth_medium = self.client.update(GROWTH_MEDIUM_WS, new_growth_medium)
|
||||
self.assertEqual(updated_gm.full_description, new_growth_medium.full_description)
|
||||
|
||||
retrieved = self.client.retrieve_by_id(GROWTH_MEDIUM_WS, new_growth_medium.record_id)
|
||||
self.assertEqual(retrieved.full_description, updated_gm.full_description)
|
||||
|
||||
finally:
|
||||
self.client.rollback()
|
||||
|
||||
|
||||
@ -1,46 +0,0 @@
|
||||
import unittest
|
||||
|
||||
from .utils import VERSION, SERVER_URL
|
||||
from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
|
||||
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient, BIBLIOGRAPHY_WS
|
||||
from mirri.entities.publication import Publication
|
||||
|
||||
|
||||
class BiolomicsLiteratureClientTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
|
||||
SECRET_ID, USERNAME, PASSWORD)
|
||||
|
||||
def test_retrieve_biblio_by_id(self):
|
||||
record_id = 100
|
||||
record_name = "Miscellaneous notes on Mucoraceae"
|
||||
biblio = self.client.retrieve_by_id(BIBLIOGRAPHY_WS, record_id)
|
||||
self.assertEqual(biblio.record_id, record_id)
|
||||
|
||||
self.assertEqual(biblio.record_name, record_name)
|
||||
|
||||
def test_retrieve_media_by_id(self):
|
||||
record_id = 100
|
||||
record_name = "Miscellaneous notes on Mucoraceae"
|
||||
biblio = self.client.retrieve_by_name(BIBLIOGRAPHY_WS, record_name)
|
||||
self.assertEqual(biblio.record_id, record_id)
|
||||
self.assertEqual(biblio.record_name, record_name)
|
||||
self.assertEqual(biblio.year, 1994)
|
||||
self.assertEqual(biblio.volume, '50')
|
||||
|
||||
def test_create_biblio(self):
|
||||
pub = Publication()
|
||||
pub.pubmed_id = 'PM18192'
|
||||
pub.journal = 'my_journal'
|
||||
pub.title = 'awesome title'
|
||||
pub.authors = 'pasdas, aposjdasd, alsalsfda'
|
||||
pub.volume = 'volume 0'
|
||||
record_id = None
|
||||
try:
|
||||
new_pub = self.client.create(BIBLIOGRAPHY_WS, pub)
|
||||
record_id = new_pub.record_id
|
||||
self.assertEqual(new_pub.title, pub.title)
|
||||
self.assertEqual(new_pub.volume, pub.volume)
|
||||
finally:
|
||||
if record_id is not None:
|
||||
self.client.delete_by_id(BIBLIOGRAPHY_WS, record_id)
|
||||
@ -1,49 +0,0 @@
|
||||
import unittest
|
||||
|
||||
from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
|
||||
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||
from mirri.biolomics.serializers.sequence import GenomicSequenceBiolomics
|
||||
from .utils import VERSION, SERVER_URL
|
||||
|
||||
|
||||
class BiolomicsSequenceClientTest(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
|
||||
SECRET_ID, USERNAME, PASSWORD)
|
||||
|
||||
def test_retrieve_seq_by_id(self):
|
||||
record_id = 101
|
||||
sequence = self.client.retrieve_by_id('sequence', record_id)
|
||||
|
||||
self.assertEqual(sequence.record_id, record_id)
|
||||
self.assertEqual(sequence.record_name, 'MUM 02.54 - CaM')
|
||||
self.assertEqual(sequence.marker_type, 'CaM')
|
||||
|
||||
def test_retrieve_seq_by_name(self):
|
||||
record_name = 'MUM 02.54 - CaM'
|
||||
sequence = self.client.retrieve_by_name('sequence', record_name)
|
||||
|
||||
self.assertEqual(sequence.record_id, 101)
|
||||
self.assertEqual(sequence.record_name, record_name)
|
||||
self.assertEqual(sequence.marker_type, 'CaM')
|
||||
|
||||
def test_create_delete_sequence(self):
|
||||
marker = GenomicSequenceBiolomics()
|
||||
marker.marker_id = 'GGAAUUA'
|
||||
marker.marker_seq = 'aattgacgat'
|
||||
marker.marker_type = 'CaM'
|
||||
marker.record_name = 'peioMarker'
|
||||
|
||||
new_marker = self.client.create('sequence', marker)
|
||||
self.assertEqual(new_marker.marker_id, 'GGAAUUA')
|
||||
self.assertEqual(new_marker.marker_seq, 'aattgacgat')
|
||||
self.assertEqual(new_marker.marker_type, 'CaM')
|
||||
self.assertEqual(new_marker.record_name, 'peioMarker')
|
||||
self.assertTrue(new_marker.record_id)
|
||||
|
||||
self.client.delete_by_id('sequence', new_marker.record_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# import sys;sys.argv = ['', 'BiolomicsClient.Test.test_get_strain_by_id']
|
||||
unittest.main()
|
||||
@ -1,727 +0,0 @@
|
||||
import unittest
|
||||
import pycountry
|
||||
import deepdiff
|
||||
from pprint import pprint
|
||||
from mirri.biolomics.serializers.sequence import (
|
||||
GenomicSequenceBiolomics,
|
||||
serialize_to_biolomics as sequence_to_biolomics,
|
||||
serialize_from_biolomics as sequence_from_biolomics)
|
||||
|
||||
from mirri.biolomics.serializers.strain import (
|
||||
serialize_to_biolomics as strain_to_biolomics,
|
||||
serialize_from_biolomics as strain_from_biolomics)
|
||||
from mirri.biolomics.serializers.growth_media import (
|
||||
# serialize_to_biolomics as growth_medium_to_biolomics,
|
||||
serialize_from_biolomics as growth_medium_from_biolomics)
|
||||
from mirri.biolomics.serializers.bibliography import (
|
||||
serializer_from_biolomics as literature_from_biolomics,
|
||||
serializer_to_biolomics as literature_to_biolomics
|
||||
)
|
||||
from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
|
||||
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||
from mirri.entities.publication import Publication
|
||||
from .utils import create_full_data_strain, VERSION, SERVER_URL
|
||||
|
||||
|
||||
STRAIN_WS = {
|
||||
'CreationDate': '2021-05-19T12:22:33',
|
||||
'CreatorUserName': 'pziarsolo@cect.org',
|
||||
'LastChangeDate': '2021-05-19T12:22:36',
|
||||
'LastChangeUserName': 'pziarsolo@cect.org',
|
||||
'RecordDetails': {'ABS related files': {'FieldType': 21,
|
||||
'Value': [{'Name': 'link',
|
||||
'Value': 'https://example.com'}]},
|
||||
'Altitude of geographic origin': {'FieldType': 4,
|
||||
'Value': 121.0},
|
||||
'Applications': {'FieldType': 5, 'Value': 'health'},
|
||||
'Catalog URL': {'FieldType': 21, 'Value': []},
|
||||
'Collection accession number': {'FieldType': 5,
|
||||
'Value': 'TESTCC 1'},
|
||||
'Collection date': {'FieldType': 8, 'Value': '1991/01/01'},
|
||||
'Collector': {'FieldType': 5, 'Value': 'the collector'},
|
||||
'Comment on taxonomy': {'FieldType': 5,
|
||||
'Value': 'lalalalla'},
|
||||
'Coordinates of geographic origin': {'FieldType': 12,
|
||||
'Value': {'Altitude': 0.0,
|
||||
'Latitude': 23.3,
|
||||
'Longitude': 23.3,
|
||||
'Precision': 0.0}},
|
||||
'Country': {'FieldType': 118,
|
||||
'Value': [{'Name': {'FieldType': 5,
|
||||
'Value': 'Spain'},
|
||||
'RecordId': 54,
|
||||
'TargetFieldValue': None}]},
|
||||
'Data provided by': {'FieldType': 22, 'Value': 'Unknown'},
|
||||
'Date of inclusion in the catalogue': {'FieldType': 8,
|
||||
'Value': '1985/05/02'},
|
||||
'Deposit date': {'FieldType': 8, 'Value': '1985/05/02'},
|
||||
'Depositor': {'FieldType': 5,
|
||||
'Value': 'NCTC, National Collection of Type '
|
||||
'Cultures - NCTC, London, United '
|
||||
'Kingdom of Great Britain and '
|
||||
'Northern Ireland.'},
|
||||
'Dual use': {'FieldType': 20, 'Value': 'yes'},
|
||||
'Enzyme production': {'FieldType': 5,
|
||||
'Value': 'some enzimes'},
|
||||
'Form': {'FieldType': 3,
|
||||
'Value': [{'Name': 'Agar', 'Value': 'yes'},
|
||||
{'Name': 'Cryo', 'Value': 'no'},
|
||||
{'Name': 'Dry Ice', 'Value': 'no'},
|
||||
{'Name': 'Liquid Culture Medium',
|
||||
'Value': 'no'},
|
||||
{'Name': 'Lyo', 'Value': 'yes'},
|
||||
{'Name': 'Oil', 'Value': 'no'},
|
||||
{'Name': 'Water', 'Value': 'no'}]},
|
||||
'GMO': {'FieldType': 22, 'Value': 'Yes'},
|
||||
'GMO construction information': {'FieldType': 5,
|
||||
'Value': 'instructrion to '
|
||||
'build'},
|
||||
'Genotype': {'FieldType': 5, 'Value': 'some genotupe'},
|
||||
'Geographic origin': {'FieldType': 5,
|
||||
'Value': 'una state; one '
|
||||
'municipality; somewhere in '
|
||||
'the world'},
|
||||
'History': {'FieldType': 5,
|
||||
'Value': 'newer < In the middle < older'},
|
||||
'Infrasubspecific names': {'FieldType': 5,
|
||||
'Value': 'serovar tete'},
|
||||
'Interspecific hybrid': {'FieldType': 20, 'Value': 'no'},
|
||||
'Isolation date': {'FieldType': 8, 'Value': '1900/01/01'},
|
||||
'Isolation habitat': {'FieldType': 5,
|
||||
'Value': 'some habitat'},
|
||||
'Isolator': {'FieldType': 5, 'Value': 'the isolator'},
|
||||
'Literature': {'FieldType': 118, 'Value': []},
|
||||
'MTA files URL': {'FieldType': 21,
|
||||
'Value': [{'Name': 'link',
|
||||
'Value': 'https://example.com'}]},
|
||||
'MTA text': {'FieldType': 5, 'Value': ''},
|
||||
'Metabolites production': {'FieldType': 5,
|
||||
'Value': 'big factory of cheese'},
|
||||
'Mutant information': {'FieldType': 5, 'Value': 'x-men'},
|
||||
'Nagoya protocol restrictions and compliance conditions': {'FieldType': 20,
|
||||
'Value': 'no '
|
||||
'known '
|
||||
'restrictions '
|
||||
'under '
|
||||
'the '
|
||||
'Nagoya '
|
||||
'protocol'},
|
||||
'Ontobiotope': {'FieldType': 118,
|
||||
'Value': [{'Name': {'FieldType': 5,
|
||||
'Value': 'anaerobic '
|
||||
'bioreactor '
|
||||
'(OBT:000190)'},
|
||||
'RecordId': 100,
|
||||
'TargetFieldValue': None}]},
|
||||
'Ontobiotope term for the isolation habitat': {'FieldType': 5,
|
||||
'Value': ''},
|
||||
'Orders': {'FieldType': 118, 'Value': []},
|
||||
'Organism type': {'FieldType': 3,
|
||||
'Value': [{'Name': 'Algae', 'Value': 'no'},
|
||||
{'Name': 'Archaea',
|
||||
'Value': 'yes'},
|
||||
{'Name': 'Bacteria',
|
||||
'Value': 'no'},
|
||||
{'Name': 'Cyanobacteria',
|
||||
'Value': 'no'},
|
||||
{'Name': 'Filamentous Fungi',
|
||||
'Value': 'no'},
|
||||
{'Name': 'Phage', 'Value': 'no'},
|
||||
{'Name': 'Plasmid',
|
||||
'Value': 'no'},
|
||||
{'Name': 'Virus', 'Value': 'no'},
|
||||
{'Name': 'Yeast', 'Value': 'no'},
|
||||
{'Name': 'Microalgae',
|
||||
'Value': '?'}]},
|
||||
'Other culture collection numbers': {'FieldType': 5,
|
||||
'Value': 'aaa a; aaa3 '
|
||||
'a3'},
|
||||
'Other denomination': {'FieldType': 5, 'Value': ''},
|
||||
'Pathogenicity': {'FieldType': 5, 'Value': 'illness'},
|
||||
'Plasmids': {'FieldType': 5, 'Value': 'asda'},
|
||||
'Plasmids collections fields': {'FieldType': 5,
|
||||
'Value': 'asdasda'},
|
||||
'Ploidy': {'FieldType': 20, 'Value': 'Polyploid'},
|
||||
'Quarantine in Europe': {'FieldType': 20, 'Value': 'no'},
|
||||
'Recommended growth medium': {'FieldType': 118,
|
||||
'Value': [{'Name': {'FieldType': 5,
|
||||
'Value': 'AAA'},
|
||||
'RecordId': 1,
|
||||
'TargetFieldValue': None}]},
|
||||
'Recommended growth temperature': {'FieldType': 19,
|
||||
'MaxValue': 30.0,
|
||||
'MinValue': 30.0},
|
||||
'Remarks': {'FieldType': 5, 'Value': 'no remarks for me'},
|
||||
'Restrictions on use': {'FieldType': 20,
|
||||
'Value': 'no restriction apply'},
|
||||
'Risk group': {'FieldType': 20, 'Value': '1'},
|
||||
'Sequences 16s': {"Value": [
|
||||
{
|
||||
"Name": {
|
||||
"Value": "X76436",
|
||||
"FieldType": 5
|
||||
},
|
||||
"RecordId": 50992,
|
||||
"TargetFieldValue": {
|
||||
"Value": {
|
||||
"Sequence": ""
|
||||
},
|
||||
"FieldType": 14
|
||||
}
|
||||
}
|
||||
],
|
||||
"FieldType": 114},
|
||||
'Sequences 18S rRNA': {'FieldType': 114, 'Value': []},
|
||||
'Sequences 23S rRNA': {'FieldType': 114, 'Value': []},
|
||||
'Sequences ACT': {'FieldType': 114, 'Value': []},
|
||||
'Sequences AmdS': {'FieldType': 114, 'Value': []},
|
||||
'Sequences Amds12': {'FieldType': 114, 'Value': []},
|
||||
'Sequences Beta tubulin': {'FieldType': 114, 'Value': []},
|
||||
'Sequences COX1': {'FieldType': 114, 'Value': []},
|
||||
'Sequences COX2': {'FieldType': 114, 'Value': []},
|
||||
'Sequences CaM': {'FieldType': 114, 'Value': []},
|
||||
'Sequences Cct8': {'FieldType': 114, 'Value': []},
|
||||
'Sequences Cit1': {'FieldType': 114, 'Value': []},
|
||||
'Sequences CypA': {'FieldType': 114, 'Value': []},
|
||||
'Sequences GDP': {'FieldType': 114, 'Value': []},
|
||||
'Sequences GPD': {'FieldType': 114, 'Value': []},
|
||||
'Sequences Genome': {'FieldType': 114, 'Value': []},
|
||||
'Sequences HIS': {'FieldType': 114, 'Value': []},
|
||||
'Sequences HSP': {'FieldType': 114, 'Value': []},
|
||||
'Sequences IDH': {'FieldType': 114, 'Value': []},
|
||||
'Sequences IGS': {'FieldType': 114, 'Value': []},
|
||||
'Sequences ITS': {'FieldType': 114, 'Value': []},
|
||||
'Sequences LSU': {'FieldType': 114, 'Value': []},
|
||||
'Sequences MAT': {'FieldType': 114, 'Value': []},
|
||||
'Sequences MAT1': {'FieldType': 114, 'Value': []},
|
||||
'Sequences Miscellaneous': {'FieldType': 114, 'Value': []},
|
||||
'Sequences NorA': {'FieldType': 114, 'Value': []},
|
||||
'Sequences NorB': {'FieldType': 114, 'Value': []},
|
||||
'Sequences Omt12': {'FieldType': 114, 'Value': []},
|
||||
'Sequences OmtA': {'FieldType': 114, 'Value': []},
|
||||
'Sequences PcCYP': {'FieldType': 114, 'Value': []},
|
||||
'Sequences PpgA': {'FieldType': 114, 'Value': []},
|
||||
'Sequences PreA': {'FieldType': 114, 'Value': []},
|
||||
'Sequences PreB': {'FieldType': 114, 'Value': []},
|
||||
'Sequences RAPD': {'FieldType': 114, 'Value': []},
|
||||
'Sequences RPB1': {'FieldType': 114, 'Value': []},
|
||||
'Sequences RPB2': {'FieldType': 114, 'Value': []},
|
||||
'Sequences SSU': {'FieldType': 114, 'Value': []},
|
||||
'Sequences TEF1a': {'FieldType': 114, 'Value': []},
|
||||
'Sequences TEF2': {'FieldType': 114, 'Value': []},
|
||||
'Sequences TUB': {'FieldType': 114, 'Value': []},
|
||||
'Sequences Tsr1': {'FieldType': 114, 'Value': []},
|
||||
'Sequences c16S rRNA': {'FieldType': 114, 'Value': []},
|
||||
'Sequences cbhI': {'FieldType': 114, 'Value': []},
|
||||
'Sequences mcm7': {'FieldType': 114, 'Value': []},
|
||||
'Sequences rbcL': {'FieldType': 114, 'Value': []},
|
||||
'Sexual state': {'FieldType': 5, 'Value': 'MT+A'},
|
||||
'Status': {'FieldType': 5,
|
||||
'Value': 'type of Bacillus alcalophilus'},
|
||||
'Strain from a registered collection': {'FieldType': 20,
|
||||
'Value': 'no'},
|
||||
'Substrate of isolation': {'FieldType': 5,
|
||||
'Value': 'some substrate'},
|
||||
'Taxon name': {'FieldType': 109,
|
||||
'Value': [{'Name': {'FieldType': 5,
|
||||
'Value': 'Escherichia '
|
||||
'coli'},
|
||||
'RecordId': 100004123,
|
||||
'TargetFieldValue': {'DesktopInfo': None,
|
||||
'DesktopInfoHtml': '<b>Current '
|
||||
'name: '
|
||||
'</b><i>Escherichia '
|
||||
'coli</i> '
|
||||
'(Migula '
|
||||
'1895) '
|
||||
'Castellani '
|
||||
'and '
|
||||
'Chalmers '
|
||||
'1919',
|
||||
'FieldType': 27,
|
||||
'NewSynFieldInfo': None,
|
||||
'ObligateSynonymId': 0,
|
||||
'OriginalSynFieldInfo': None,
|
||||
'SynInfo': {'BasionymRecord': {'NameInfo': '',
|
||||
'RecordId': 100004123,
|
||||
'RecordName': '<i>Escherichia '
|
||||
'coli</i> '
|
||||
'(Migula '
|
||||
'1895) '
|
||||
'Castellani '
|
||||
'and '
|
||||
'Chalmers '
|
||||
'1919',
|
||||
'SecondLevelRecords': None},
|
||||
'CurrentNameRecord': {'NameInfo': '',
|
||||
'RecordId': 100004123,
|
||||
'RecordName': '<i>Escherichia '
|
||||
'coli</i> '
|
||||
'(Migula '
|
||||
'1895) '
|
||||
'Castellani '
|
||||
'and '
|
||||
'Chalmers '
|
||||
'1919',
|
||||
'SecondLevelRecords': None},
|
||||
'ObligateSynonymRecords': [],
|
||||
'SelectedRecord': {
|
||||
'NameInfo': '<i>Escherichia '
|
||||
'coli</i> '
|
||||
'(Migula '
|
||||
'1895) '
|
||||
'Castellani '
|
||||
'and '
|
||||
'Chalmers '
|
||||
'1919',
|
||||
'RecordId': 100004123,
|
||||
'RecordName': '<i>Escherichia '
|
||||
'coli</i> '
|
||||
'(Migula '
|
||||
'1895) '
|
||||
'Castellani '
|
||||
'and '
|
||||
'Chalmers '
|
||||
'1919',
|
||||
'SecondLevelRecords': None},
|
||||
'TaxonSynonymsRecords': []},
|
||||
'SynonymId': 100004123}}]},
|
||||
'Tested temperature growth range': {'FieldType': 19,
|
||||
'MaxValue': 32.0,
|
||||
'MinValue': 29.0},
|
||||
'Type description': {'FieldType': 5, 'Value': ''}},
|
||||
'RecordId': 148038,
|
||||
'RecordName': 'MIRRI 2240561'}
|
||||
|
||||
STRAIN_WS_EXPECTED_NO_REMOTE = {
|
||||
'Acronym': 'MIRRI',
|
||||
'RecordDetails': {'ABS related files': {'FieldType': 'U',
|
||||
'Value': [{'Name': 'link',
|
||||
'Value': 'https://example.com'}]},
|
||||
'Altitude of geographic origin': {'FieldType': 'D',
|
||||
'Value': 121},
|
||||
'Applications': {'FieldType': 'E', 'Value': 'health'},
|
||||
'Collection accession number': {'FieldType': 'E',
|
||||
'Value': 'TESTCC 1'},
|
||||
'Collection date': {'FieldType': 'H', 'Value': '1991-01-01'},
|
||||
'Collector': {'FieldType': 'E', 'Value': 'the collector'},
|
||||
'Comment on taxonomy': {'FieldType': 'E',
|
||||
'Value': 'lalalalla'},
|
||||
'Coordinates of geographic origin': {'FieldType': 'L',
|
||||
'Value': {'Latitude': 23.3,
|
||||
'Longitude': 23.3}},
|
||||
'Date of inclusion in the catalogue': {'FieldType': 'H',
|
||||
'Value': '1985-05-02'},
|
||||
'Deposit date': {'FieldType': 'H', 'Value': '1985-05-02'},
|
||||
'Depositor': {'FieldType': 'E',
|
||||
'Value': 'NCTC, National Collection of Type '
|
||||
'Cultures - NCTC, London, United '
|
||||
'Kingdom of Great Britain and '
|
||||
'Northern Ireland.'},
|
||||
'Dual use': {'FieldType': 'T', 'Value': 'yes'},
|
||||
'Enzyme production': {'FieldType': 'E',
|
||||
'Value': 'some enzimes'},
|
||||
'Form': {'FieldType': 'C',
|
||||
'Value': [{'Name': 'Agar', 'Value': 'yes'},
|
||||
{'Name': 'Cryo', 'Value': 'no'},
|
||||
{'Name': 'Dry Ice', 'Value': 'no'},
|
||||
{'Name': 'Liquid Culture Medium',
|
||||
'Value': 'no'},
|
||||
{'Name': 'Lyo', 'Value': 'yes'},
|
||||
{'Name': 'Oil', 'Value': 'no'},
|
||||
{'Name': 'Water', 'Value': 'no'}]},
|
||||
'GMO': {'FieldType': 'V', 'Value': 'Yes'},
|
||||
'GMO construction information': {'FieldType': 'E',
|
||||
'Value': 'instructrion to '
|
||||
'build'},
|
||||
'Genotype': {'FieldType': 'E', 'Value': 'some genotupe'},
|
||||
'Geographic origin': {'FieldType': 'E',
|
||||
'Value': 'una state; one '
|
||||
'municipality; somewhere in '
|
||||
'the world'},
|
||||
'History': {'FieldType': 'E',
|
||||
'Value': 'firstplave < seconn place < third '
|
||||
'place'},
|
||||
'Infrasubspecific names': {'FieldType': 'E',
|
||||
'Value': 'serovar tete'},
|
||||
'Interspecific hybrid': {'FieldType': 'T', 'Value': 'no'},
|
||||
'Isolation date': {'FieldType': 'H', 'Value': '1900-01-01'},
|
||||
'Isolation habitat': {'FieldType': 'E',
|
||||
'Value': 'some habitat'},
|
||||
'Isolator': {'FieldType': 'E', 'Value': 'the isolator'},
|
||||
'MTA files URL': {'FieldType': 'U',
|
||||
'Value': [{'Name': 'link',
|
||||
'Value': 'https://example.com'}]},
|
||||
'Metabolites production': {'FieldType': 'E',
|
||||
'Value': 'big factory of cheese'},
|
||||
'Mutant information': {'FieldType': 'E', 'Value': 'x-men'},
|
||||
'Nagoya protocol restrictions and compliance conditions': {'FieldType': 'T',
|
||||
'Value': 'no '
|
||||
'known '
|
||||
'restrictions '
|
||||
'under '
|
||||
'the '
|
||||
'Nagoya '
|
||||
'protocol'},
|
||||
'Ontobiotope': {'FieldType': 'RLink', 'Value': 'OBT:000190'},
|
||||
'Organism type': {'FieldType': 'C',
|
||||
'Value': [{'Name': 'Algae', 'Value': 'no'},
|
||||
{'Name': 'Archaea',
|
||||
'Value': 'yes'},
|
||||
{'Name': 'Bacteria',
|
||||
'Value': 'no'},
|
||||
{'Name': 'Cyanobacteria',
|
||||
'Value': 'no'},
|
||||
{'Name': 'Filamentous Fungi',
|
||||
'Value': 'no'},
|
||||
{'Name': 'Phage', 'Value': 'no'},
|
||||
{'Name': 'Plasmid',
|
||||
'Value': 'no'},
|
||||
{'Name': 'Virus', 'Value': 'no'},
|
||||
{'Name': 'Yeast',
|
||||
'Value': 'no'}]},
|
||||
'Other culture collection numbers': {'FieldType': 'E',
|
||||
'Value': 'aaa a; aaa3 '
|
||||
'a3'},
|
||||
'Pathogenicity': {'FieldType': 'E', 'Value': 'illness'},
|
||||
'Plasmids': {'FieldType': 'E', 'Value': 'asda'},
|
||||
'Plasmids collections fields': {'FieldType': 'E',
|
||||
'Value': 'asdasda'},
|
||||
'Ploidy': {'FieldType': 'T', 'Value': 'Polyploid'},
|
||||
'Quarantine in Europe': {'FieldType': 'T', 'Value': 'no'},
|
||||
'Recommended growth temperature': {'FieldType': 'S',
|
||||
'MaxValue': 30.0,
|
||||
'MinValue': 30.0},
|
||||
'Remarks': {'FieldType': 'E', 'Value': 'no remarks for me'},
|
||||
'Restrictions on use': {'FieldType': 'T',
|
||||
'Value': 'no restriction apply'},
|
||||
'Risk group': {'FieldType': 'T', 'Value': '1'},
|
||||
'Sexual state': {'FieldType': 'E', 'Value': 'MT+A'},
|
||||
'Status': {'FieldType': 'E',
|
||||
'Value': 'type of Bacillus alcalophilus'},
|
||||
'Strain from a registered collection': {'FieldType': 'T',
|
||||
'Value': 'no'},
|
||||
'Substrate of isolation': {'FieldType': 'E',
|
||||
'Value': 'some substrate'},
|
||||
'Taxon name': {'FieldType': 'SynLink',
|
||||
'Value': 'Escherichia coli'},
|
||||
'Tested temperature growth range': {'FieldType': 'S',
|
||||
'MaxValue': 32.0,
|
||||
'MinValue': 29.0}}}
|
||||
|
||||
|
||||
class StrainSerializerTest(unittest.TestCase):
|
||||
|
||||
def test_serialize_to_biolomics(self):
|
||||
strain = create_full_data_strain()
|
||||
ws_strain = strain_to_biolomics(strain, client=None)
|
||||
self.assertDictEqual(ws_strain, STRAIN_WS_EXPECTED_NO_REMOTE)
|
||||
|
||||
def test_serialize_to_biolomics_remote(self):
|
||||
client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
|
||||
SECRET_ID, USERNAME, PASSWORD)
|
||||
strain = create_full_data_strain()
|
||||
marker = GenomicSequenceBiolomics()
|
||||
marker.marker_id = "MUM 02.15 - Beta tubulin"
|
||||
marker.marker_type = 'TUBB'
|
||||
strain.genetics.markers = [marker]
|
||||
ws_strain = strain_to_biolomics(strain, client=client)
|
||||
|
||||
self.assertEqual(strain.collect.habitat_ontobiotope,
|
||||
ws_strain['RecordDetails']['Ontobiotope']['Value'][0]['Name']['Value'])
|
||||
self.assertEqual(pycountry.countries.get(alpha_3=strain.collect.location.country).name,
|
||||
ws_strain['RecordDetails']['Country']['Value'][0]['Name']['Value'])
|
||||
self.assertEqual(strain.publications[0].title,
|
||||
ws_strain['RecordDetails']['Literature']['Value'][0]['Name']['Value'])
|
||||
self.assertEqual(strain.genetics.markers[0].marker_id,
|
||||
ws_strain['RecordDetails']['Sequences TUB']['Value'][0]['Name']['Value'])
|
||||
|
||||
def test_serialize_from_biolomics(self):
|
||||
ws_strain = STRAIN_WS
|
||||
strain = strain_from_biolomics(ws_strain)
|
||||
self.assertEqual(strain.record_id, 148038)
|
||||
self.assertEqual(strain.record_name, 'MIRRI 2240561')
|
||||
self.assertEqual(strain.taxonomy.long_name, 'Escherichia coli')
|
||||
self.assertEqual(strain.growth.recommended_media, ['AAA'])
|
||||
self.assertEqual(strain.collect.location.altitude, 121)
|
||||
self.assertEqual(strain.collect.location.country, 'ESP')
|
||||
self.assertEqual(strain.applications, 'health')
|
||||
self.assertEqual(strain.id.strain_id, 'TESTCC 1')
|
||||
self.assertEqual(strain.collect.date.strfdate, '19910101')
|
||||
self.assertEqual(strain.taxonomy.comments, 'lalalalla')
|
||||
self.assertEqual(strain.catalog_inclusion_date.strfdate, '19850502')
|
||||
self.assertIn('NCTC, National Collection of Type ', strain.deposit.who)
|
||||
self.assertTrue(strain.is_potentially_harmful)
|
||||
self.assertEqual(strain.form_of_supply, ['Agar', 'Lyo'])
|
||||
self.assertTrue(strain.genetics.gmo)
|
||||
self.assertEqual(strain.genetics.gmo_construction, 'instructrion to build')
|
||||
self.assertEqual(strain.genetics.genotype, 'some genotupe')
|
||||
self.assertEqual(strain.history, ['newer', 'In the middle', 'older'])
|
||||
self.assertEqual(strain.taxonomy.infrasubspecific_name, 'serovar tete')
|
||||
self.assertEqual(strain.isolation.who, 'the isolator')
|
||||
self.assertEqual(strain.isolation.date.strfdate, '19000101')
|
||||
self.assertEqual(strain.mta_files, ['https://example.com'])
|
||||
self.assertEqual(strain.genetics.mutant_info, 'x-men')
|
||||
self.assertEqual(strain.collect.habitat_ontobiotope, 'OBT:000190')
|
||||
self.assertEqual(strain.taxonomy.organism_type[0].name, 'Archaea')
|
||||
self.assertEqual(strain.other_numbers[0].strain_id, 'aaa a')
|
||||
self.assertEqual(strain.other_numbers[1].strain_id, 'aaa3 a3')
|
||||
self.assertEqual(strain.pathogenicity, 'illness')
|
||||
self.assertEqual(strain.genetics.plasmids, ['asda'])
|
||||
self.assertEqual(strain.genetics.ploidy, 9)
|
||||
self.assertFalse(strain.is_subject_to_quarantine)
|
||||
self.assertEqual(strain.risk_group, '1')
|
||||
self.assertFalse(strain.is_from_registered_collection)
|
||||
self.assertEqual(strain.growth.tested_temp_range, {'min': 29, 'max': 32})
|
||||
|
||||
|
||||
BIOLOMICSSEQ = {
|
||||
'RecordDetails': {
|
||||
'Barcode level': {'FieldType': 20, 'Value': 'undefined'},
|
||||
'DNA extract number': {'FieldType': 5, 'Value': ''},
|
||||
'DNA sequence': {'FieldType': 14,
|
||||
'Value': {'Sequence': 'caaaggaggccttctccctcttcgtaag'}},
|
||||
'Editing state': {'FieldType': 20, 'Value': 'Auto import'},
|
||||
'Forward primer(s)': {'FieldType': 5, 'Value': ''},
|
||||
'Genbank': {'FieldType': 21, 'Value': []},
|
||||
'INSDC number': {'FieldType': 5, 'Value': 'AATGAT'},
|
||||
'Literature': {'FieldType': 21, 'Value': []},
|
||||
'Literature1': {'FieldType': 118, 'Value': []},
|
||||
'Marker name': {'FieldType': 5, 'Value': 'CaM'},
|
||||
'Privacy': {'FieldType': 20, 'Value': 'undefined'},
|
||||
'Quality': {'FieldType': 5, 'Value': ''},
|
||||
'Remarks': {'FieldType': 5, 'Value': ''},
|
||||
'Reverse primer(s)': {'FieldType': 5, 'Value': ''},
|
||||
'Review state': {'FieldType': 5, 'Value': ''},
|
||||
'Strain number': {'FieldType': 5, 'Value': 'MUM 02.54'}},
|
||||
'RecordId': 101,
|
||||
'RecordName': 'MUM 02.54 - CaM'}
|
||||
|
||||
|
||||
class SequenceSerializerTest(unittest.TestCase):
|
||||
|
||||
def test_from_biolomics(self):
|
||||
marker = sequence_from_biolomics(BIOLOMICSSEQ)
|
||||
self.assertEqual(marker.record_name, BIOLOMICSSEQ['RecordName'])
|
||||
self.assertEqual(marker.record_id, BIOLOMICSSEQ['RecordId'])
|
||||
self.assertEqual(marker.marker_type, BIOLOMICSSEQ['RecordDetails']['Marker name']['Value'])
|
||||
self.assertEqual(marker.marker_id, BIOLOMICSSEQ['RecordDetails']['INSDC number']['Value'])
|
||||
self.assertEqual(marker.marker_seq, BIOLOMICSSEQ['RecordDetails']['DNA sequence']['Value']['Sequence'])
|
||||
|
||||
def test_to_biolomics(self):
|
||||
marker = GenomicSequenceBiolomics()
|
||||
marker.marker_id = 'GGAAUUA'
|
||||
marker.marker_seq = 'aattgacgat'
|
||||
marker.marker_type = 'CaM'
|
||||
marker.record_name = 'peioMarker'
|
||||
marker.record_id = 111
|
||||
ws_seq = sequence_to_biolomics(marker)
|
||||
expected = {'RecordId': marker.record_id,
|
||||
'RecordName': marker.record_name,
|
||||
'RecordDetails': {
|
||||
'INSDC number': {'Value': marker.marker_id, 'FieldType': 'E'},
|
||||
'DNA sequence': {'Value': {'Sequence': marker.marker_seq}, 'FieldType': 'N'},
|
||||
'Marker name': {'Value': marker.marker_type, 'FieldType': 'E'}}}
|
||||
|
||||
self.assertEqual(ws_seq, expected)
|
||||
|
||||
|
||||
BIOLOMICS_MEDIUM = {
|
||||
"RecordId": 100,
|
||||
"RecordName": "MA20S",
|
||||
"RecordDetails": {
|
||||
"Full description": {
|
||||
"Value": "mout agar+20% saccharose",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Ingredients": {
|
||||
"Value": "Malt extract\r\n\tDilute brewery malt with water to 10% sugar solution (level 10 on Brix saccharose meter), 15 minutes at 121 C\r\nsaccharose\t200g\r\ndistilled water\t0.6l\r\nagar\t15g\r\n",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Link to full description": {
|
||||
"Value": [],
|
||||
"FieldType": 21
|
||||
},
|
||||
"Medium description": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Other name": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"pH": {
|
||||
"Value": "7 with KOH",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Remarks": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Reference": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Sterilization conditions": {
|
||||
"Value": "15 minutes at 121 C",
|
||||
"FieldType": 5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class MediumSerializerTest(unittest.TestCase):
|
||||
def test_from_biolomics(self):
|
||||
medium = growth_medium_from_biolomics(BIOLOMICS_MEDIUM)
|
||||
self.assertEqual(medium.record_id, BIOLOMICS_MEDIUM['RecordId'])
|
||||
self.assertEqual(medium.record_name, BIOLOMICS_MEDIUM['RecordName'])
|
||||
self.assertEqual(medium.ingredients, BIOLOMICS_MEDIUM['RecordDetails']['Ingredients']['Value'])
|
||||
self.assertEqual(medium.full_description, BIOLOMICS_MEDIUM['RecordDetails']['Full description']['Value'])
|
||||
self.assertEqual(medium.ph, BIOLOMICS_MEDIUM['RecordDetails']['pH']['Value'])
|
||||
|
||||
|
||||
BIOLOMICS_BIBLIOGRAPHY = {
|
||||
"RecordId": 100,
|
||||
"RecordName": "Miscellaneous notes on Mucoraceae",
|
||||
"RecordDetails": {
|
||||
"Associated strains": {
|
||||
"Value": [],
|
||||
"FieldType": 118
|
||||
},
|
||||
"Associated taxa": {
|
||||
"Value": [],
|
||||
"FieldType": 118
|
||||
},
|
||||
"Authors": {
|
||||
"Value": "Schipper, M.A.A.; Samson, R.A.",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Associated sequences": {
|
||||
"Value": [],
|
||||
"FieldType": 118
|
||||
},
|
||||
"Abstract": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Collection": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"DOI number": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Editor(s)": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Full reference": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Hyperlink": {
|
||||
"Value": [],
|
||||
"FieldType": 21
|
||||
},
|
||||
"ISBN": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"ISSN": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Issue": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Journal": {
|
||||
"Value": "Mycotaxon",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Journal-Book": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Keywords": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Page from": {
|
||||
"Value": "475",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Page to": {
|
||||
"Value": "491",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Publisher": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"PubMed ID": {
|
||||
"Value": "",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Volume": {
|
||||
"Value": "50",
|
||||
"FieldType": 5
|
||||
},
|
||||
"Year": {
|
||||
"Value": 1994,
|
||||
"FieldType": 4
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class BibliographySerializerTest(unittest.TestCase):
|
||||
def test_from_biolomics(self):
|
||||
pub = literature_from_biolomics(BIOLOMICS_BIBLIOGRAPHY)
|
||||
self.assertEqual(pub.record_name, "Miscellaneous notes on Mucoraceae")
|
||||
self.assertEqual(pub.record_id, 100)
|
||||
self.assertEqual(pub.year, 1994)
|
||||
self.assertEqual(pub.authors, "Schipper, M.A.A.; Samson, R.A.")
|
||||
|
||||
def test_to_biolomics(self):
|
||||
pub = Publication()
|
||||
pub.title = 'My title'
|
||||
pub.year = 1992
|
||||
pub.authors = 'me and myself'
|
||||
pub.pubmed_id = '1112222'
|
||||
pub.issue = 'issue'
|
||||
ws_data = literature_to_biolomics(pub)
|
||||
expected = {
|
||||
'RecordDetails': {
|
||||
'Authors': {'FieldType': 'E', 'Value': 'me and myself'},
|
||||
'PubMed ID': {'FieldType': 'E', 'Value': '1112222'},
|
||||
'Issue': {'FieldType': 'E', 'Value': 'issue'},
|
||||
'Year': {'FieldType': 'D', 'Value': 1992}},
|
||||
'RecordName': 'My title'}
|
||||
self.assertDictEqual(expected, ws_data)
|
||||
|
||||
def test_to_biolomics2(self):
|
||||
pub = Publication()
|
||||
pub.pubmed_id = '1112222'
|
||||
ws_data = literature_to_biolomics(pub)
|
||||
expected = {
|
||||
'RecordDetails': {
|
||||
'PubMed ID': {'FieldType': 'E', 'Value': '1112222'}},
|
||||
'RecordName': f'PUBMED:{pub.pubmed_id}'}
|
||||
self.assertDictEqual(expected, ws_data)
|
||||
|
||||
pub = Publication()
|
||||
pub.doi = 'doi.er/111/12131'
|
||||
ws_data = literature_to_biolomics(pub)
|
||||
expected = {
|
||||
'RecordDetails': {
|
||||
'DOI number': {'FieldType': 'E', 'Value': pub.doi}},
|
||||
'RecordName': f'DOI:{pub.doi}'}
|
||||
self.assertDictEqual(expected, ws_data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys;
|
||||
sys.argv = ['', 'BibliographySerializerTest']
|
||||
unittest.main()
|
||||
@ -1,156 +0,0 @@
|
||||
import unittest
|
||||
|
||||
from mirri.biolomics.remote.endoint_names import STRAIN_WS
|
||||
from .utils import VERSION, SERVER_URL, create_full_data_strain
|
||||
from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
|
||||
from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
|
||||
from mirri.biolomics.pipelines.strain import retrieve_strain_by_accession_number
|
||||
|
||||
|
||||
class BiolomicsStrainClientTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
|
||||
SECRET_ID, USERNAME, PASSWORD)
|
||||
|
||||
def test_retrieve_strain_by_id(self):
|
||||
record_id = 14803
|
||||
strain = self.client.retrieve_by_id(STRAIN_WS, record_id)
|
||||
self.assertEqual(strain.record_id, record_id)
|
||||
print(strain.record_name)
|
||||
|
||||
def test_retrieve_strain_by_name(self):
|
||||
record_id = 14803
|
||||
record_name = 'MIRRI0014803'
|
||||
strain = self.client.retrieve_by_name(STRAIN_WS, record_name)
|
||||
self.assertEqual(strain.record_name, record_name)
|
||||
self.assertEqual(strain.record_id, record_id)
|
||||
|
||||
def test_search_strain(self):
|
||||
accession_number = "BEA 0014B"
|
||||
query = {"Query": [{"Index": 0,
|
||||
"FieldName": "Collection accession number",
|
||||
"Operation": "TextExactMatch",
|
||||
"Value": accession_number}],
|
||||
"Expression": "Q0",
|
||||
"DisplayStart": 0,
|
||||
"DisplayLength": 10}
|
||||
|
||||
search_response = self.client.search(STRAIN_WS, query)
|
||||
|
||||
self.assertEqual(search_response['total'], 1)
|
||||
self.assertEqual(search_response['records'][0].id.strain_id,
|
||||
accession_number)
|
||||
|
||||
def test_search_strain4(self):
|
||||
accession_number = "TESTCC 1"
|
||||
query = {"Query": [{"Index": 0,
|
||||
"FieldName": "Collection accession number",
|
||||
"Operation": "TextExactMatch",
|
||||
"Value": accession_number}],
|
||||
"Expression": "Q0",
|
||||
"DisplayStart": 0,
|
||||
"DisplayLength": 10}
|
||||
|
||||
search_response = self.client.search(STRAIN_WS, query)
|
||||
for strain in search_response['records']:
|
||||
print(strain)
|
||||
self.client.delete_by_id(STRAIN_WS, strain.record_id)
|
||||
|
||||
def test_search_strain_no_found(self):
|
||||
accession_number = "BEA 0014B_"
|
||||
query = {"Query": [{"Index": 0,
|
||||
"FieldName": "Collection accession number",
|
||||
"Operation": "TextExactMatch",
|
||||
"Value": accession_number}],
|
||||
"Expression": "Q0",
|
||||
"DisplayStart": 0,
|
||||
"DisplayLength": 10}
|
||||
|
||||
search_response = self.client.search(STRAIN_WS, query)
|
||||
|
||||
self.assertEqual(search_response['total'], 0)
|
||||
self.assertFalse(search_response['records'])
|
||||
|
||||
def test_create_strain(self):
|
||||
strain = create_full_data_strain()
|
||||
strain.taxonomy.interspecific_hybrid = None
|
||||
record_id = None
|
||||
try:
|
||||
new_strain = self.client.create(STRAIN_WS, strain)
|
||||
record_id = new_strain.record_id
|
||||
self.assertIsNone(new_strain.taxonomy.interspecific_hybrid)
|
||||
self.assertEqual(new_strain.growth.recommended_media, ['AAA'])
|
||||
self.assertEqual(new_strain.id.strain_id, strain.id.strain_id)
|
||||
finally:
|
||||
if record_id is not None:
|
||||
self.client.delete_by_id(STRAIN_WS, record_id)
|
||||
|
||||
def test_update_strain(self):
|
||||
strain = create_full_data_strain()
|
||||
record_id = None
|
||||
try:
|
||||
new_strain = self.client.create(STRAIN_WS, strain)
|
||||
record_id = new_strain.record_id
|
||||
self.assertEqual(new_strain.id.strain_id, strain.id.strain_id)
|
||||
self.assertFalse(new_strain.taxonomy.interspecific_hybrid)
|
||||
new_strain.id.number = '2'
|
||||
new_strain.taxonomy.interspecific_hybrid = None
|
||||
updated_strain = self.client.update(STRAIN_WS, new_strain)
|
||||
self.assertEqual(updated_strain.id.strain_id, new_strain.id.strain_id)
|
||||
self.assertIsNone(updated_strain.taxonomy.interspecific_hybrid)
|
||||
|
||||
retrieved_strain = self.client.retrieve_by_id(STRAIN_WS, record_id)
|
||||
self.assertEqual(retrieved_strain.id.strain_id, new_strain.id.strain_id)
|
||||
self.assertIsNone(retrieved_strain.taxonomy.interspecific_hybrid)
|
||||
finally:
|
||||
if record_id is not None:
|
||||
print('deleting')
|
||||
self.client.delete_by_id(STRAIN_WS, record_id)
|
||||
|
||||
def test_update_strain_pathogenicity(self):
|
||||
strain = create_full_data_strain()
|
||||
print(strain.pathogenicity)
|
||||
record_id = None
|
||||
try:
|
||||
new_strain = self.client.create(STRAIN_WS, strain)
|
||||
record_id = new_strain.record_id
|
||||
self.assertEqual(new_strain.id.strain_id, strain.id.strain_id)
|
||||
self.assertEqual(new_strain.pathogenicity, 'illness')
|
||||
|
||||
new_strain.pathogenicity = None
|
||||
updated_strain = self.client.update(STRAIN_WS, new_strain)
|
||||
self.assertEqual(updated_strain.id.strain_id, new_strain.id.strain_id)
|
||||
self.assertIsNone(updated_strain.pathogenicity)
|
||||
|
||||
retrieved_strain = self.client.retrieve_by_id(STRAIN_WS, record_id)
|
||||
self.assertEqual(retrieved_strain.id.strain_id, new_strain.id.strain_id)
|
||||
self.assertIsNone(retrieved_strain.pathogenicity)
|
||||
finally:
|
||||
if record_id is not None:
|
||||
self.client.delete_by_id(STRAIN_WS, record_id)
|
||||
|
||||
def test_search_by_accession_number(self):
|
||||
accession_number = "BEA 0014B"
|
||||
strain = retrieve_strain_by_accession_number(self.client, accession_number)
|
||||
self.assertEqual(strain.id.strain_id, accession_number)
|
||||
|
||||
def test_search_by_accession_number(self):
|
||||
accession_number = "BEA 0014B_"
|
||||
strain = retrieve_strain_by_accession_number(self.client, accession_number)
|
||||
self.assertFalse(strain)
|
||||
|
||||
|
||||
class BiolomicsClientGrowthMediaTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
|
||||
SECRET_ID, USERNAME, PASSWORD)
|
||||
|
||||
def xtest_growth_media_by_name(self):
|
||||
gm = self.client.retrieve('growth_media', 'AAA')
|
||||
self.assertEqual(gm['Record Id'], 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# import sys;sys.argv = ['',
|
||||
# 'BiolomicsWriter.test_mirri_excel_parser_invalid']
|
||||
unittest.main()
|
||||
@ -1,99 +0,0 @@
|
||||
from mirri.biolomics.serializers.strain import StrainMirri
|
||||
from mirri.entities.strain import StrainId, OrganismType
|
||||
from mirri.entities.sequence import GenomicSequence
|
||||
from mirri.entities.date_range import DateRange
|
||||
from mirri.entities.publication import Publication
|
||||
from mirri.settings import NAGOYA_NO_RESTRICTIONS
|
||||
|
||||
VERSION = 'v2'
|
||||
SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
|
||||
|
||||
|
||||
def create_full_data_strain():
|
||||
strain = StrainMirri()
|
||||
|
||||
strain.id.number = "1"
|
||||
strain.id.collection = "TESTCC"
|
||||
strain.id.url = "https://cect/2342"
|
||||
|
||||
strain.restriction_on_use = "no_restriction"
|
||||
strain.nagoya_protocol = NAGOYA_NO_RESTRICTIONS
|
||||
strain.abs_related_files = ['https://example.com']
|
||||
strain.mta_files = ['https://example.com']
|
||||
strain.other_numbers.append(StrainId(collection="aaa", number="a"))
|
||||
strain.other_numbers.append(StrainId(collection="aaa3", number="a3"))
|
||||
strain.is_from_registered_collection = False
|
||||
strain.risk_group = '1'
|
||||
strain.is_potentially_harmful = True
|
||||
strain.is_subject_to_quarantine = False
|
||||
|
||||
strain.taxonomy.organism_type = [OrganismType(2)]
|
||||
strain.taxonomy.genus = 'Escherichia'
|
||||
strain.taxonomy.species = 'coli'
|
||||
strain.taxonomy.interspecific_hybrid = False
|
||||
strain.taxonomy.infrasubspecific_name = 'serovar tete'
|
||||
strain.taxonomy.comments = 'lalalalla'
|
||||
|
||||
strain.status = "type of Bacillus alcalophilus"
|
||||
strain.history = 'firstplave < seconn place < third place'
|
||||
|
||||
strain.deposit.who = "NCTC, National Collection of Type Cultures - NCTC, London, United Kingdom of Great Britain and Northern Ireland."
|
||||
strain.deposit.date = DateRange(year=1985, month=5, day=2)
|
||||
strain.catalog_inclusion_date = DateRange(year=1985, month=5, day=2)
|
||||
|
||||
strain.collect.location.country = "ESP"
|
||||
strain.collect.location.state = "una state"
|
||||
strain.collect.location.municipality = "one municipality"
|
||||
strain.collect.location.longitude = 23.3
|
||||
strain.collect.location.latitude = 23.3
|
||||
strain.collect.location.altitude = 121
|
||||
strain.collect.location.site = "somewhere in the world"
|
||||
strain.collect.habitat_ontobiotope = "OBT:000190"
|
||||
strain.collect.habitat = 'some habitat'
|
||||
strain.collect.who = "the collector"
|
||||
strain.collect.date = DateRange(year=1991)
|
||||
|
||||
strain.isolation.date = DateRange(year=1900)
|
||||
strain.isolation.who = 'the isolator'
|
||||
strain.isolation.substrate_host_of_isolation = 'some substrate'
|
||||
|
||||
# already existing media in test_mirri
|
||||
|
||||
strain.growth.recommended_temp = {'min': 30, 'max': 30}
|
||||
strain.growth.recommended_media = ["AAA"]
|
||||
strain.growth.tested_temp_range = {'min': 29, 'max': 32}
|
||||
|
||||
strain.form_of_supply = ["Agar", "Lyo"]
|
||||
|
||||
#strain.other_denominations = ["lajdflasjdldj"]
|
||||
|
||||
gen_seq = GenomicSequence()
|
||||
gen_seq.marker_id = "pepe"
|
||||
gen_seq.marker_type = "16S rRNA"
|
||||
strain.genetics.markers.append(gen_seq)
|
||||
strain.genetics.ploidy = 9
|
||||
strain.genetics.genotype = 'some genotupe'
|
||||
strain.genetics.gmo = True
|
||||
strain.genetics.gmo_construction = 'instructrion to build'
|
||||
strain.genetics.mutant_info = 'x-men'
|
||||
strain.genetics.sexual_state = 'MT+A'
|
||||
strain.genetics.plasmids = ['asda']
|
||||
strain.genetics.plasmids_in_collections = ['asdasda']
|
||||
|
||||
pub = Publication()
|
||||
pub.title = "The genus Amylomyces"
|
||||
strain.publications = [pub]
|
||||
|
||||
strain.plant_pathogenicity_code = 'PATH:001'
|
||||
strain.pathogenicity = 'illness'
|
||||
strain.enzyme_production = 'some enzimes'
|
||||
strain.production_of_metabolites = 'big factory of cheese'
|
||||
strain.applications = 'health'
|
||||
|
||||
strain.remarks = 'no remarks for me'
|
||||
return strain
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
strain = create_full_data_strain()
|
||||
print(strain.collect.habitat_ontobiotope)
|
||||
Binary file not shown.
@ -1,5 +0,0 @@
|
||||
{
|
||||
"key1": "value1",
|
||||
"key2": "value2",
|
||||
"key3": "value3"
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,318 +0,0 @@
|
||||
"""
|
||||
Created on 2020(e)ko abe. 2(a)
|
||||
|
||||
@author: peio
|
||||
"""
|
||||
|
||||
import unittest
|
||||
|
||||
from mirri.entities.publication import Publication
|
||||
from mirri.entities.date_range import DateRange
|
||||
from mirri.entities.location import Location
|
||||
from mirri.entities.sequence import GenomicSequence
|
||||
from mirri.entities.strain import (
|
||||
Collect,
|
||||
Deposit,
|
||||
Isolation,
|
||||
ValidationError,
|
||||
OrganismType,
|
||||
Strain,
|
||||
StrainId,
|
||||
Taxonomy,
|
||||
)
|
||||
from mirri.settings import (
|
||||
COLLECT,
|
||||
COUNTRY,
|
||||
DATE_OF_ISOLATION,
|
||||
DEPOSIT,
|
||||
DEPOSITOR,
|
||||
GENETICS,
|
||||
GROWTH,
|
||||
ISOLATED_BY,
|
||||
ISOLATION,
|
||||
LOCATION,
|
||||
MARKERS,
|
||||
NAGOYA_DOCS_AVAILABLE,
|
||||
NAGOYA_PROTOCOL,
|
||||
ORGANISM_TYPE,
|
||||
OTHER_CULTURE_NUMBERS,
|
||||
PLOIDY,
|
||||
RECOMMENDED_GROWTH_MEDIUM,
|
||||
TAXONOMY,
|
||||
DATE_OF_INCLUSION, NO_RESTRICTION
|
||||
)
|
||||
from mirri.validation.entity_validators import validate_strain
|
||||
|
||||
|
||||
class TestDataRange(unittest.TestCase):
|
||||
def test_data_range_init(self):
|
||||
dr = DateRange()
|
||||
|
||||
self.assertFalse(dr)
|
||||
|
||||
self.assertEqual(dr.__str__(), "")
|
||||
self.assertEqual(dr.range["start"], None)
|
||||
self.assertEqual(dr.range["end"], None)
|
||||
|
||||
dr.strpdate("2012")
|
||||
self.assertEqual(dr.strfdate, "2012----")
|
||||
self.assertTrue(dr)
|
||||
|
||||
dr.strpdate("2012----")
|
||||
self.assertEqual(dr.strfdate, "2012----")
|
||||
|
||||
dr.strpdate("201212--")
|
||||
self.assertEqual(dr.strfdate, "201212--")
|
||||
try:
|
||||
dr.strpdate("201213--")
|
||||
self.fail()
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
dr = DateRange(year=2012, month=13)
|
||||
self.fail()
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
dr = DateRange(year=2020)
|
||||
self.assertEqual(dr.strfdate, "2020----")
|
||||
|
||||
dr2 = dr.strpdate("2012")
|
||||
self.assertEqual(dr2.range["start"].year, 2012)
|
||||
self.assertEqual(dr2.range["start"].month, 1)
|
||||
self.assertEqual(dr2.range["start"].day, 1)
|
||||
|
||||
self.assertEqual(dr2.range["end"].year, 2012)
|
||||
self.assertEqual(dr2.range["end"].month, 12)
|
||||
self.assertEqual(dr2.range["end"].day, 31)
|
||||
|
||||
|
||||
class TestCollect(unittest.TestCase):
|
||||
def test_collect_basic(self):
|
||||
collect = Collect()
|
||||
self.assertEqual(collect.dict(), {})
|
||||
|
||||
collect.location.country = "ESP"
|
||||
collect.date = DateRange().strpdate("2012----")
|
||||
|
||||
collect.who = "pepito"
|
||||
self.assertEqual(
|
||||
dict(collect.dict()),
|
||||
{
|
||||
"location": {"countryOfOriginCode": "ESP"},
|
||||
"collected_by": "pepito",
|
||||
"date_of_collection": "2012----",
|
||||
},
|
||||
)
|
||||
self.assertEqual(collect.__str__(),
|
||||
"Collected: Spain in 2012---- by pepito")
|
||||
|
||||
|
||||
class TestOrganismType(unittest.TestCase):
|
||||
def test_basic_usage(self):
|
||||
org_type = OrganismType(2)
|
||||
self.assertEqual(org_type.name, "Archaea")
|
||||
self.assertEqual(org_type.code, 2)
|
||||
try:
|
||||
org_type.ko = 'a'
|
||||
self.fail()
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
org_type = OrganismType("Archaea")
|
||||
|
||||
|
||||
class TestTaxonomy(unittest.TestCase):
|
||||
def test_taxonomy_basic(self):
|
||||
taxonomy = Taxonomy()
|
||||
self.assertEqual(taxonomy.dict(), {})
|
||||
self.assertFalse(taxonomy)
|
||||
|
||||
def test_taxonomy_with_data(self):
|
||||
taxonomy = Taxonomy()
|
||||
taxonomy.genus = "Bacilus"
|
||||
taxonomy.organism_type = [OrganismType("Archaea")]
|
||||
taxonomy.species = "vulgaris"
|
||||
self.assertEqual(taxonomy.long_name, "Bacilus vulgaris")
|
||||
|
||||
# print(taxonomy.dict())
|
||||
|
||||
|
||||
class TestLocation(unittest.TestCase):
|
||||
def test_empty_init(self):
|
||||
loc = Location()
|
||||
self.assertEqual(loc.dict(), {})
|
||||
self.assertFalse(loc)
|
||||
|
||||
def test_add_data(self):
|
||||
loc = Location()
|
||||
loc.country = "esp"
|
||||
self.assertEqual(loc.dict(), {COUNTRY: "esp"})
|
||||
loc.state = None
|
||||
self.assertEqual(loc.dict(), {COUNTRY: "esp"})
|
||||
|
||||
|
||||
class TestStrain(unittest.TestCase):
|
||||
def test_empty_strain(self):
|
||||
strain = Strain()
|
||||
self.assertEqual(strain.dict(), {})
|
||||
|
||||
def test_strain_add_data(self):
|
||||
strain = Strain()
|
||||
|
||||
strain.id.number = "5433"
|
||||
strain.id.collection = "CECT"
|
||||
strain.id.url = "https://cect/2342"
|
||||
|
||||
try:
|
||||
strain.nagoya_protocol = "asdas"
|
||||
self.fail()
|
||||
except ValidationError:
|
||||
pass
|
||||
|
||||
strain.nagoya_protocol = NAGOYA_DOCS_AVAILABLE
|
||||
strain.dict()[NAGOYA_PROTOCOL] = NAGOYA_DOCS_AVAILABLE
|
||||
|
||||
strain.collect.location.country = "ESP"
|
||||
|
||||
self.assertEqual(strain.dict()[COLLECT][LOCATION][COUNTRY], "ESP")
|
||||
|
||||
strain.genetics.ploidy = 9
|
||||
self.assertEqual(strain.dict()[GENETICS][PLOIDY], 9)
|
||||
|
||||
strain.growth.recommended_media = ["asd"]
|
||||
strain.isolation.date = DateRange(year=1900)
|
||||
self.assertEqual(strain.dict()[ISOLATION]
|
||||
[DATE_OF_ISOLATION], "1900----")
|
||||
|
||||
strain.deposit.who = "pepe"
|
||||
self.assertEqual(strain.dict()[DEPOSIT][DEPOSITOR], "pepe")
|
||||
|
||||
strain.growth.recommended_media = ["11"]
|
||||
self.assertEqual(strain.dict()[GROWTH]
|
||||
[RECOMMENDED_GROWTH_MEDIUM], ["11"])
|
||||
|
||||
strain.taxonomy.organism_type = [OrganismType(2)]
|
||||
self.assertEqual(
|
||||
strain.dict()[TAXONOMY][ORGANISM_TYPE], [
|
||||
{"code": 2, "name": "Archaea"}]
|
||||
)
|
||||
|
||||
strain.taxonomy.organism_type = [OrganismType("Algae")]
|
||||
self.assertEqual(
|
||||
strain.dict()[TAXONOMY][ORGANISM_TYPE], [
|
||||
{"code": 1, "name": "Algae"}]
|
||||
)
|
||||
|
||||
strain.other_numbers.append(StrainId(collection="aaa", number="a"))
|
||||
strain.other_numbers.append(StrainId(collection="aaa3", number="a3"))
|
||||
self.assertEqual(
|
||||
strain.dict()[OTHER_CULTURE_NUMBERS],
|
||||
[
|
||||
{"collection_code": "aaa", "accession_number": "a"},
|
||||
{"collection_code": "aaa3", "accession_number": "a3"},
|
||||
],
|
||||
)
|
||||
strain.form_of_supply = ["Agar", "Lyo"]
|
||||
gen_seq = GenomicSequence()
|
||||
self.assertEqual(gen_seq.dict(), {})
|
||||
gen_seq.marker_id = "pepe"
|
||||
gen_seq.marker_type = "16S rRNA"
|
||||
strain.genetics.markers.append(gen_seq)
|
||||
self.assertEqual(
|
||||
strain.dict()[GENETICS][MARKERS],
|
||||
[{"marker_type": "16S rRNA", "INSDC": "pepe"}],
|
||||
)
|
||||
|
||||
strain.collect.habitat_ontobiotope = "OBT:111111"
|
||||
self.assertEqual(strain.collect.habitat_ontobiotope, "OBT:111111")
|
||||
|
||||
try:
|
||||
strain.collect.habitat_ontobiotope = "OBT:11111"
|
||||
self.fail()
|
||||
except ValidationError:
|
||||
pass
|
||||
|
||||
# publications
|
||||
try:
|
||||
strain.publications = 1
|
||||
self.fail()
|
||||
except ValidationError:
|
||||
pass
|
||||
pub = Publication()
|
||||
pub.id = "1"
|
||||
try:
|
||||
strain.publications = pub
|
||||
self.fail()
|
||||
except ValidationError:
|
||||
pass
|
||||
|
||||
strain.publications = [pub]
|
||||
self.assertEqual(strain.publications[0].id, "1")
|
||||
|
||||
strain.catalog_inclusion_date = DateRange(year=1992)
|
||||
self.assertEqual(strain.dict()[DATE_OF_INCLUSION], '1992----')
|
||||
|
||||
import pprint
|
||||
|
||||
pprint.pprint(strain.dict())
|
||||
|
||||
def test_strain_validation(self):
|
||||
strain = Strain()
|
||||
strain.form_of_supply = ['Lyo']
|
||||
|
||||
return
|
||||
|
||||
errors = validate_strain(strain)
|
||||
self.assertEqual(len(errors), 10)
|
||||
|
||||
strain.id.collection = 'test'
|
||||
strain.id.number = '1'
|
||||
|
||||
|
||||
errors = validate_strain(strain)
|
||||
self.assertEqual(len(errors), 9)
|
||||
|
||||
strain.nagoya_protocol = NAGOYA_DOCS_AVAILABLE
|
||||
strain.restriction_on_use = NO_RESTRICTION
|
||||
strain.risk_group = 1
|
||||
strain.taxonomy.organism_type = [OrganismType(4)]
|
||||
strain.taxonomy.hybrids = ['Sac lac', 'Sac lcac3']
|
||||
strain.growth.recommended_media = ['aa']
|
||||
strain.growth.recommended_temp = {'min': 2, 'max':5}
|
||||
strain.form_of_supply = ['lyo']
|
||||
strain.collect.location.country = 'ESP'
|
||||
errors = validate_strain(strain)
|
||||
self.assertFalse(errors)
|
||||
|
||||
|
||||
class TestIsolation(unittest.TestCase):
|
||||
def test_iniatialize_isollation(self):
|
||||
isolation = Isolation()
|
||||
self.assertEqual(isolation.dict(), {})
|
||||
isolation.who = "pepito"
|
||||
self.assertTrue(ISOLATED_BY in isolation.dict())
|
||||
isolation.date = DateRange().strpdate("2012----")
|
||||
self.assertTrue(DATE_OF_ISOLATION in isolation.dict())
|
||||
|
||||
try:
|
||||
isolation.location.site = "spain"
|
||||
self.fail()
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
|
||||
|
||||
class TestGenomicSequence(unittest.TestCase):
|
||||
def test_empty_init(self):
|
||||
gen_seq = GenomicSequence()
|
||||
self.assertEqual(gen_seq.dict(), {})
|
||||
gen_seq.marker_id = "pepe"
|
||||
gen_seq.marker_type = "16S rRNA"
|
||||
self.assertEqual(gen_seq.dict(), {
|
||||
"marker_type": "16S rRNA", "INSDC": "pepe"})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# import sys;sys.argv = ['', 'TestStrain']
|
||||
unittest.main()
|
||||
@ -1,51 +0,0 @@
|
||||
from mirri.entities.strain import ValidationError
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from pprint import pprint
|
||||
from mirri.io.parsers.mirri_excel import parse_mirri_excel
|
||||
|
||||
TEST_DATA_DIR = Path(__file__).parent / "data"
|
||||
|
||||
|
||||
class MirriExcelTests(unittest.TestCase):
|
||||
|
||||
def test_mirri_excel_parser(self):
|
||||
in_path = TEST_DATA_DIR / "valid.mirri.xlsx"
|
||||
with in_path.open("rb") as fhand:
|
||||
parsed_data = parse_mirri_excel(fhand, version="20200601")
|
||||
|
||||
medium = parsed_data["growth_media"][0]
|
||||
self.assertEqual("1", medium.acronym)
|
||||
self.assertEqual(medium.description, "NUTRIENT BROTH/AGAR I")
|
||||
|
||||
strains = list(parsed_data["strains"])
|
||||
strain = strains[0]
|
||||
self.assertEqual(strain.publications[0].id, 1)
|
||||
self.assertEqual(strain.publications[0].title, 'Cosa')
|
||||
self.assertEqual(strain.id.number, "1")
|
||||
pprint(strain.dict())
|
||||
|
||||
def xtest_mirri_excel_parser_invalid_fail(self):
|
||||
in_path = TEST_DATA_DIR / "invalid.mirri.xlsx"
|
||||
with in_path.open("rb") as fhand:
|
||||
try:
|
||||
parse_mirri_excel(fhand, version="20200601")
|
||||
self.fail()
|
||||
except ValidationError:
|
||||
pass
|
||||
|
||||
def xtest_mirri_excel_parser_invalid(self):
|
||||
in_path = TEST_DATA_DIR / "invalid.mirri.xlsx"
|
||||
with in_path.open("rb") as fhand:
|
||||
parsed_data = parse_mirri_excel(
|
||||
fhand, version="20200601")
|
||||
|
||||
errors = parsed_data["errors"]
|
||||
for _id, _errors in errors.items():
|
||||
print(_id, _errors)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# import sys;sys.argv = ['',
|
||||
# 'MirriExcelTests.test_mirri_excel_parser_invalid']
|
||||
unittest.main()
|
||||
@ -1,589 +0,0 @@
|
||||
from datetime import datetime
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from itertools import chain
|
||||
|
||||
from mirri.validation.tags import (
|
||||
CHOICES,
|
||||
COORDINATES,
|
||||
CROSSREF,
|
||||
CROSSREF_NAME,
|
||||
DATE,
|
||||
MATCH,
|
||||
MISSING,
|
||||
MULTIPLE,
|
||||
NUMBER,
|
||||
REGEXP,
|
||||
SEPARATOR,
|
||||
TAXON,
|
||||
TYPE,
|
||||
UNIQUE,
|
||||
VALUES
|
||||
)
|
||||
|
||||
from mirri.validation.excel_validator import (
|
||||
is_valid_choices,
|
||||
is_valid_coords,
|
||||
is_valid_crossrefs,
|
||||
is_valid_date,
|
||||
is_valid_missing,
|
||||
is_valid_number,
|
||||
is_valid_regex,
|
||||
is_valid_taxon,
|
||||
is_valid_unique,
|
||||
is_valid_file,
|
||||
validate_mirri_excel,
|
||||
)
|
||||
|
||||
|
||||
TEST_DATA_DIR = Path(__file__).parent / "data"
|
||||
TS_VALUE = "value"
|
||||
TS_CONF = "conf"
|
||||
TS_ASSERT = "assert_func"
|
||||
|
||||
|
||||
class MirriExcelValidationTests(unittest.TestCase):
|
||||
|
||||
def test_validation_structure(self):
|
||||
in_path = TEST_DATA_DIR / "invalid_structure.mirri.xlsx"
|
||||
with in_path.open("rb") as fhand:
|
||||
error_log = validate_mirri_excel(fhand)
|
||||
|
||||
entities = []
|
||||
err_codes = []
|
||||
for ett, errors in error_log.get_errors().items():
|
||||
entities.append(ett)
|
||||
err_codes.extend([err.code for err in errors])
|
||||
|
||||
self.assertIn("EFS", entities)
|
||||
self.assertIn("STD", entities)
|
||||
self.assertIn("GOD", entities)
|
||||
self.assertIn("GMD", entities)
|
||||
|
||||
self.assertIn("EFS03", err_codes)
|
||||
self.assertIn("EFS06", err_codes)
|
||||
self.assertIn("EFS08", err_codes)
|
||||
self.assertIn("GOD06", err_codes)
|
||||
self.assertIn("GMD01", err_codes)
|
||||
self.assertIn("STD05", err_codes)
|
||||
self.assertIn("STD08", err_codes)
|
||||
self.assertIn("STD12", err_codes)
|
||||
|
||||
def test_validation_content(self):
|
||||
in_path = TEST_DATA_DIR / "invalid_content.mirri.xlsx"
|
||||
with in_path.open("rb") as fhand:
|
||||
error_log = validate_mirri_excel(fhand)
|
||||
|
||||
entities = []
|
||||
err_codes = []
|
||||
for ett, errors in error_log.get_errors().items():
|
||||
entities.append(ett)
|
||||
err_codes.extend([err.code for err in errors])
|
||||
|
||||
self.assertTrue(len(err_codes) > 0)
|
||||
|
||||
self.assertNotIn("EFS", entities)
|
||||
self.assertIn("STD", entities)
|
||||
self.assertIn("GOD", entities)
|
||||
self.assertIn("GID", entities)
|
||||
|
||||
self.assertIn("GOD04", err_codes)
|
||||
self.assertIn("GOD07", err_codes)
|
||||
self.assertIn("GID03", err_codes)
|
||||
self.assertIn("STD11", err_codes)
|
||||
self.assertIn("STD15", err_codes)
|
||||
self.assertIn("STD22", err_codes)
|
||||
self.assertIn("STD04", err_codes)
|
||||
self.assertIn("STD10", err_codes)
|
||||
self.assertIn("STD07", err_codes)
|
||||
self.assertIn("STD14", err_codes)
|
||||
self.assertIn("STD16", err_codes)
|
||||
|
||||
def test_validation_valid(self):
|
||||
in_path = TEST_DATA_DIR / "valid.mirri.xlsx"
|
||||
with in_path.open("rb") as fhand:
|
||||
error_log = validate_mirri_excel(fhand)
|
||||
|
||||
self.assertTrue(len(error_log.get_errors()) == 0)
|
||||
|
||||
|
||||
class ValidatoionFunctionsTest(unittest.TestCase):
|
||||
|
||||
def test_is_valid_regex(self):
|
||||
tests = [
|
||||
{
|
||||
TS_VALUE: "abcDEF",
|
||||
TS_CONF: {TYPE: REGEXP, MATCH: r"[a-zA-Z]+"},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "123456",
|
||||
TS_CONF: {TYPE: REGEXP, MATCH: r"[a-zA-Z]+"},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: "123456",
|
||||
TS_CONF: {TYPE: REGEXP, MATCH: r"\d+"},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "abcdef",
|
||||
TS_CONF: {TYPE: REGEXP, MATCH: r"\d+"},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: "abc 123",
|
||||
TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "123 abc",
|
||||
TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "123 ",
|
||||
TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
value = test[TS_VALUE]
|
||||
conf = test[TS_CONF]
|
||||
assert_func = test[TS_ASSERT]
|
||||
with self.subTest(value=value):
|
||||
assert_func(is_valid_regex(value, conf))
|
||||
|
||||
def test_is_valid_choices(self):
|
||||
tests = [
|
||||
{
|
||||
TS_VALUE: "1",
|
||||
TS_CONF: {TYPE: CHOICES, VALUES: ["1", "2", "3", "4"]},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "1, 3",
|
||||
TS_CONF: {
|
||||
TYPE: CHOICES,
|
||||
VALUES: ["1", "2", "3", "4"],
|
||||
MULTIPLE: True,
|
||||
SEPARATOR: ","
|
||||
},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "5",
|
||||
TS_CONF: {TYPE: CHOICES, VALUES: ["1", "2", "3", "4"]},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
value = test[TS_VALUE]
|
||||
conf = test[TS_CONF]
|
||||
assert_func = test[TS_ASSERT]
|
||||
with self.subTest(value=value):
|
||||
assert_func(is_valid_choices(value, conf))
|
||||
|
||||
def test_is_valid_crossref(self):
|
||||
tests = [
|
||||
{
|
||||
TS_VALUE: "abc",
|
||||
TS_CONF: {
|
||||
TYPE: CROSSREF,
|
||||
CROSSREF_NAME: "values",
|
||||
"crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
|
||||
},
|
||||
TS_ASSERT: self.assertTrue,
|
||||
},
|
||||
{
|
||||
TS_VALUE: "123",
|
||||
TS_CONF: {
|
||||
TYPE: CROSSREF,
|
||||
CROSSREF_NAME: "values",
|
||||
"crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
|
||||
},
|
||||
TS_ASSERT: self.assertFalse,
|
||||
},
|
||||
{
|
||||
TS_VALUE: "abc, def",
|
||||
TS_CONF: {
|
||||
TYPE: CROSSREF,
|
||||
CROSSREF_NAME: "values",
|
||||
"crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
|
||||
MULTIPLE: True,
|
||||
SEPARATOR: ",",
|
||||
},
|
||||
TS_ASSERT: self.assertTrue,
|
||||
},
|
||||
{
|
||||
TS_VALUE: "abc, 123",
|
||||
TS_CONF: {
|
||||
TYPE: CROSSREF,
|
||||
CROSSREF_NAME: "values",
|
||||
"crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
|
||||
MULTIPLE: True,
|
||||
SEPARATOR: ",",
|
||||
},
|
||||
TS_ASSERT: self.assertFalse,
|
||||
},
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
value = test[TS_VALUE]
|
||||
conf = test[TS_CONF]
|
||||
assert_func = test[TS_ASSERT]
|
||||
with self.subTest(value=value):
|
||||
assert_func(is_valid_crossrefs(value, conf))
|
||||
|
||||
def test_is_valid_missing(self):
|
||||
tests = [
|
||||
{
|
||||
TS_VALUE: 1,
|
||||
TS_CONF: {TYPE: MISSING},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "abc",
|
||||
TS_CONF: {TYPE: MISSING},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: None,
|
||||
TS_CONF: {TYPE: MISSING},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
value = test[TS_VALUE]
|
||||
conf = test[TS_CONF]
|
||||
assert_func = test[TS_ASSERT]
|
||||
with self.subTest(value=value):
|
||||
assert_func(is_valid_missing(value, conf))
|
||||
|
||||
def test_is_valid_date(self):
|
||||
tests = [
|
||||
{
|
||||
TS_VALUE: '2020-04-07',
|
||||
TS_CONF: {TYPE: DATE},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: '2020/04/07',
|
||||
TS_CONF: {TYPE: DATE},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: datetime(2021, 5, 1),
|
||||
TS_CONF: {TYPE: DATE},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: '2020-05',
|
||||
TS_CONF: {TYPE: DATE},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: '2020/05',
|
||||
TS_CONF: {TYPE: DATE},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: 2020,
|
||||
TS_CONF: {TYPE: DATE},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: '2021 05 01',
|
||||
TS_CONF: {TYPE: DATE},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: '04-07-2020',
|
||||
TS_CONF: {TYPE: DATE},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: '2021-02-31',
|
||||
TS_CONF: {TYPE: DATE},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: '2021-15',
|
||||
TS_CONF: {TYPE: DATE},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: '15-2021',
|
||||
TS_CONF: {TYPE: DATE},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: 3000,
|
||||
TS_CONF: {TYPE: DATE},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: -2020,
|
||||
TS_CONF: {TYPE: DATE},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
value = test[TS_VALUE]
|
||||
conf = test[TS_CONF]
|
||||
assert_func = test[TS_ASSERT]
|
||||
with self.subTest(value=value):
|
||||
assert_func(is_valid_date(value, conf))
|
||||
|
||||
def test_is_valid_coordinates(self):
|
||||
tests = [
|
||||
{
|
||||
TS_VALUE: "23; 50",
|
||||
TS_CONF: {TYPE: COORDINATES},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "-90; -100",
|
||||
TS_CONF: {TYPE: COORDINATES},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "90; 100",
|
||||
TS_CONF: {TYPE: COORDINATES},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "0; 0",
|
||||
TS_CONF: {TYPE: COORDINATES},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "10; 20; 5",
|
||||
TS_CONF: {TYPE: COORDINATES},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "10; 20; -5",
|
||||
TS_CONF: {TYPE: COORDINATES},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "91; 50",
|
||||
TS_CONF: {TYPE: COORDINATES},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: "87; 182",
|
||||
TS_CONF: {TYPE: COORDINATES},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: "-200; 182",
|
||||
TS_CONF: {TYPE: COORDINATES},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: "20, 40",
|
||||
TS_CONF: {TYPE: COORDINATES},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: "abc def",
|
||||
TS_CONF: {TYPE: COORDINATES},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: 123,
|
||||
TS_CONF: {TYPE: COORDINATES},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
value = test[TS_VALUE]
|
||||
conf = test[TS_CONF]
|
||||
assert_func = test[TS_ASSERT]
|
||||
with self.subTest(value=value):
|
||||
assert_func(is_valid_coords(value, conf))
|
||||
|
||||
def test_is_valid_number(self):
|
||||
tests = [
|
||||
{
|
||||
TS_VALUE: 1,
|
||||
TS_CONF: {TYPE: NUMBER},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: 2.5,
|
||||
TS_CONF: {TYPE: NUMBER},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "10",
|
||||
TS_CONF: {TYPE: NUMBER},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "10.5",
|
||||
TS_CONF: {TYPE: NUMBER},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: 5,
|
||||
TS_CONF: {TYPE: NUMBER, "min": 0},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: 5,
|
||||
TS_CONF: {TYPE: NUMBER, "max": 10},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: 5,
|
||||
TS_CONF: {TYPE: NUMBER, "min": 0, "max": 10},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "hello",
|
||||
TS_CONF: {TYPE: NUMBER},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: 10,
|
||||
TS_CONF: {TYPE: NUMBER, "max": 5},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: 0,
|
||||
TS_CONF: {TYPE: NUMBER, "min": 5},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
value = test[TS_VALUE]
|
||||
conf = test[TS_CONF]
|
||||
assert_func = test[TS_ASSERT]
|
||||
with self.subTest(value=value):
|
||||
assert_func(is_valid_number(value, conf))
|
||||
|
||||
def test_is_valid_taxon(self):
|
||||
tests = [
|
||||
{
|
||||
TS_VALUE: 'sp. species',
|
||||
TS_CONF: {TYPE: TAXON},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: 'spp species subsp. subspecies',
|
||||
TS_CONF: {TYPE: TAXON},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: 'spp species subsp. subspecies var. variety',
|
||||
TS_CONF: {TYPE: TAXON},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: 'spp taxon',
|
||||
TS_CONF: {TYPE: TAXON},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: 'Candidaceae',
|
||||
TS_CONF: {TYPE: TAXON},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: 'sp sp species',
|
||||
TS_CONF: {TYPE: TAXON},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
{
|
||||
TS_VALUE: 'spp species abc. def',
|
||||
TS_CONF: {TYPE: TAXON},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
value = test[TS_VALUE]
|
||||
conf = test[TS_CONF]
|
||||
assert_func = test[TS_ASSERT]
|
||||
with self.subTest(value=value):
|
||||
assert_func(is_valid_taxon(value, conf))
|
||||
|
||||
def test_is_valid_unique(self):
|
||||
tests = [
|
||||
{
|
||||
TS_VALUE: "abc",
|
||||
TS_CONF: {
|
||||
TYPE: UNIQUE,
|
||||
"label": "values",
|
||||
"shown_values": {}
|
||||
},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "jkl",
|
||||
TS_CONF: {
|
||||
TYPE: UNIQUE,
|
||||
"label": "values",
|
||||
"shown_values": {
|
||||
"values": {"abc": '',
|
||||
"def": '',
|
||||
"ghi": ''},
|
||||
}
|
||||
},
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: "abc",
|
||||
TS_CONF: {
|
||||
TYPE: UNIQUE,
|
||||
"label": "values",
|
||||
"shown_values": {
|
||||
"values": {"abc": '',
|
||||
"def": '',
|
||||
"ghi": ''},
|
||||
}
|
||||
},
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
value = test[TS_VALUE]
|
||||
conf = test[TS_CONF]
|
||||
assert_func = test[TS_ASSERT]
|
||||
with self.subTest(value=value):
|
||||
assert_func(is_valid_unique(value, conf))
|
||||
|
||||
def test_is_valid_file(self):
|
||||
tests = [
|
||||
{
|
||||
TS_VALUE: TEST_DATA_DIR / "invalid_structure.mirri.xlsx",
|
||||
TS_ASSERT: self.assertTrue
|
||||
},
|
||||
{
|
||||
TS_VALUE: TEST_DATA_DIR / "invalid_excel.mirri.json",
|
||||
TS_ASSERT: self.assertFalse
|
||||
},
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
value = test[TS_VALUE]
|
||||
assert_func = test[TS_ASSERT]
|
||||
with self.subTest(value=value):
|
||||
assert_func(is_valid_file(value,))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
# sys.argv = ['',
|
||||
# 'ValidatoionFunctionsTest.test_is_valid_regex']
|
||||
unittest.main()
|
||||
@ -1,24 +0,0 @@
|
||||
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from mirri.io.writers.mirri_excel import write_mirri_excel
|
||||
from mirri.io.parsers.mirri_excel import parse_mirri_excel
|
||||
|
||||
TEST_DATA_DIR = Path(__file__).parent / "data"
|
||||
|
||||
|
||||
class MirriExcelTests(unittest.TestCase):
|
||||
def test_valid_excel(self):
|
||||
in_path = TEST_DATA_DIR / "valid.mirri.full.xlsx"
|
||||
parsed_data = parse_mirri_excel(in_path.open('rb'), version="20200601")
|
||||
strains = parsed_data["strains"]
|
||||
growth_media = parsed_data["growth_media"]
|
||||
out_path = Path("/tmp/test.xlsx")
|
||||
|
||||
write_mirri_excel(out_path, strains, growth_media, version="20200601")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# import sys;sys.argv = ['',
|
||||
# 'BiolomicsWriter.test_mirri_excel_parser_invalid']
|
||||
unittest.main()
|
||||
BIN
validation/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
validation/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
validation/__pycache__/excel_validator.cpython-311.pyc
Normal file
BIN
validation/__pycache__/excel_validator.cpython-311.pyc
Normal file
Binary file not shown.
BIN
validation/__pycache__/tags.cpython-311.pyc
Normal file
BIN
validation/__pycache__/tags.cpython-311.pyc
Normal file
Binary file not shown.
BIN
validation/__pycache__/validate_v5.cpython-311.pyc
Normal file
BIN
validation/__pycache__/validate_v5.cpython-311.pyc
Normal file
Binary file not shown.
BIN
validation/__pycache__/validation_conf_12052023.cpython-311.pyc
Normal file
BIN
validation/__pycache__/validation_conf_12052023.cpython-311.pyc
Normal file
Binary file not shown.
BIN
validation/__pycache__/validation_conf_20200601.cpython-311.pyc
Normal file
BIN
validation/__pycache__/validation_conf_20200601.cpython-311.pyc
Normal file
Binary file not shown.
BIN
validation/__pycache__/validation_conf_20200602.cpython-311.pyc
Normal file
BIN
validation/__pycache__/validation_conf_20200602.cpython-311.pyc
Normal file
Binary file not shown.
BIN
validation/__pycache__/validation_conf_20230224.cpython-311.pyc
Normal file
BIN
validation/__pycache__/validation_conf_20230224.cpython-311.pyc
Normal file
Binary file not shown.
BIN
validation/__pycache__/validation_conf_20230324.cpython-311.pyc
Normal file
BIN
validation/__pycache__/validation_conf_20230324.cpython-311.pyc
Normal file
Binary file not shown.
BIN
validation/__pycache__/version_config.cpython-311.pyc
Normal file
BIN
validation/__pycache__/version_config.cpython-311.pyc
Normal file
Binary file not shown.
BIN
validation/error_logging/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
validation/error_logging/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
validation/error_logging/__pycache__/error.cpython-311.pyc
Normal file
BIN
validation/error_logging/__pycache__/error.cpython-311.pyc
Normal file
Binary file not shown.
BIN
validation/error_logging/__pycache__/error_log.cpython-311.pyc
Normal file
BIN
validation/error_logging/__pycache__/error_log.cpython-311.pyc
Normal file
Binary file not shown.
Binary file not shown.
@ -62,6 +62,10 @@ class Entity():
|
||||
def GID(self) -> str:
|
||||
return 'Genomic Information'
|
||||
|
||||
|
||||
def VRS(self) -> str:
|
||||
return 'Version'
|
||||
|
||||
def OTD(self) -> str:
|
||||
return 'Ontobiotope'
|
||||
|
||||
547
validation/error_logging/error_message.py
Normal file
547
validation/error_logging/error_message.py
Normal file
@ -0,0 +1,547 @@
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class ErrorMessage():
|
||||
"""Error message
|
||||
|
||||
Args:
|
||||
code (str): Error code.
|
||||
pk (str | optional): The instance's primary key that triggered the error. Defaults to None.
|
||||
value (str | optional): The instance's value that triggered the error. Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(self, code: str, pk: Optional[str] = None, value: Optional[str] = None):
|
||||
self.code = code.upper()
|
||||
self.pk = pk
|
||||
self.value = value
|
||||
|
||||
@property
|
||||
def _codes(self) -> list:
|
||||
return [
|
||||
func
|
||||
for func in dir(self)
|
||||
if func.isupper() and
|
||||
callable(getattr(self, func)) and
|
||||
not func.startswith("__")
|
||||
]
|
||||
|
||||
@property
|
||||
def _messages(self) -> dict:
|
||||
return {code: getattr(self, code) for code in self._codes}
|
||||
|
||||
@property
|
||||
def message(self) -> str:
|
||||
if not self._validate_code():
|
||||
raise ValueError(f"{self.code} not found")
|
||||
return self._messages[self.code]()
|
||||
|
||||
@property
|
||||
def code(self) -> str:
|
||||
return self._code
|
||||
|
||||
@code.setter
|
||||
def code(self, code: str) -> None:
|
||||
self._code = code.upper()
|
||||
|
||||
def _validate_code(self) -> bool:
|
||||
return self.code in self._codes
|
||||
|
||||
@property
|
||||
def pk(self) -> str:
|
||||
return self._pk
|
||||
|
||||
@pk.setter
|
||||
def pk(self, pk: str) -> None:
|
||||
self._pk = pk
|
||||
|
||||
@property
|
||||
def value(self) -> str:
|
||||
return self._value
|
||||
|
||||
@value.setter
|
||||
def value(self, value: str) -> None:
|
||||
self._value = value
|
||||
|
||||
"""
|
||||
Excel File Structure Error Codes
|
||||
"""
|
||||
|
||||
def EXL00(self):
|
||||
return f"The provided file '{self.pk}' is not an excel(xlsx) file"
|
||||
|
||||
def EFS01(self):
|
||||
return "The 'Growth media' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS02(self):
|
||||
return "The 'Geographic origin' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS03(self):
|
||||
return "The 'Literature' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS04(self):
|
||||
return "The 'Sexual state' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS05(self):
|
||||
return "The 'Strains' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS06(self):
|
||||
return "The 'Ontobiotope' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS07(self):
|
||||
return "The 'Markers' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS08(self):
|
||||
return "The 'Genomic information' sheet is missing. Please check the provided excel template."
|
||||
|
||||
def EFS09(self):
|
||||
return "The 'Version' sheet is missing. Please check the provided excel template."
|
||||
|
||||
"""
|
||||
Growth Media Error Codes
|
||||
"""
|
||||
|
||||
def GMD01(self):
|
||||
return "The 'Acronym' column is a mandatory field in the Growth Media sheet."
|
||||
|
||||
def GMD02(self):
|
||||
return "The 'Acronym' column is empty or has missing values."
|
||||
|
||||
def GMD03(self):
|
||||
return "The 'Description' column is a mandatory field in the Growth Media sheet. The column can not be empty."
|
||||
|
||||
def GMD04(self):
|
||||
return f"The 'Description' for growth media with Acronym {self.pk} is missing."
|
||||
|
||||
"""
|
||||
Geographic Origin Error Codes
|
||||
"""
|
||||
|
||||
def GOD01(self):
|
||||
return "The 'ID' column is a mandatory field in the Geographic Origin sheet."
|
||||
|
||||
def GOD02(self):
|
||||
return "The 'ID' column is empty or has missing values."
|
||||
|
||||
def GOD03(self):
|
||||
return "The 'Country' column is a mandatory field in the Geographic Origin sheet. The column can not be empty."
|
||||
|
||||
def GOD04(self):
|
||||
return f"The 'Country' for geographic origin with ID {self.pk} is missing."
|
||||
|
||||
def GOD05(self):
|
||||
return f"The 'Country' for geographic origin with ID {self.pk} is incorrect."
|
||||
|
||||
def GOD06(self):
|
||||
return f"The 'Locality' column is a mandatory field in the Geographic Origin sheet. The column can not be empty."
|
||||
|
||||
def GOD07(self):
|
||||
return f"The 'Locality' for geographic origin with ID {self.pk} is missing."
|
||||
|
||||
"""
|
||||
Literature Error Codes
|
||||
"""
|
||||
|
||||
def LID01(self):
|
||||
return "The 'ID' column is a mandatory field in the Literature sheet."
|
||||
|
||||
def LID02(self):
|
||||
return "The 'ID' column empty or missing values."
|
||||
|
||||
def LID03(self):
|
||||
return "The 'Full reference' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||
|
||||
#def LID04(self):
|
||||
#return f"The 'Full reference' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID05(self):
|
||||
return "The 'Authors' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||
|
||||
#def LID06(self):
|
||||
#return f"The 'Authors' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID07(self):
|
||||
return "The 'Title' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||
|
||||
#def LID08(self):
|
||||
#return f"The 'Title' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID09(self):
|
||||
return "The 'Journal' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||
|
||||
#def LID10(self):
|
||||
#return f"The 'Journal' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID11(self):
|
||||
return "The 'Year' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||
|
||||
def LID12(self,):
|
||||
return f"The 'Year' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID13(self):
|
||||
return "The 'Volume' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||
|
||||
def LID14(self):
|
||||
return f"The 'Volume' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID15(self):
|
||||
return "The 'First page' column is a mandatory field. The column can not be empty."
|
||||
|
||||
def LID16(self):
|
||||
return f"The 'First page' for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID17(self):
|
||||
return( f"There are four types of ways to fill in the 'Literature' sheet.",
|
||||
"1st- Columns 'ID' and 'DOI' must be obrigatory.",
|
||||
"2nd-Columns 'ID' and 'PMID' are obrigatory.",
|
||||
"3rd-Columns 'ID' and 'Full reference' are obrigatory.",
|
||||
"In the alternative of these three types of forms not being filled in, we have:",
|
||||
"4th-Columns 'ID', 'Authors', 'Title', 'Journal', 'Year', 'Volume', 'First page'.")
|
||||
|
||||
def LID18(self):
|
||||
return "The 'PMID' column is a mandatory field. The column can not be empty."
|
||||
|
||||
#def LID19(self):
|
||||
#return f"PMID for literature with ID {self.pk} is missing."
|
||||
|
||||
def LID20(self):
|
||||
return "The 'DOI' column is a mandatory field. The column can not be empty."
|
||||
|
||||
#def LID21(self):
|
||||
#return f"DOI for literature with ID {self.pk} is missing."
|
||||
|
||||
"""
|
||||
Strains Error Codes
|
||||
"""
|
||||
def STD01(self):
|
||||
return "The 'accessionNumber' column is a mandatory field in the Strains sheet."
|
||||
|
||||
def STD02(self):
|
||||
return "The 'accessionNumber' column is empty or has missing values."
|
||||
|
||||
def STD03(self):
|
||||
return f"The 'accessionNumber' must be unique. The '{self.value}' is repeated."
|
||||
|
||||
def STD04(self):
|
||||
return (f"The 'accessionNumber' {self.pk} is not according to the specification."
|
||||
" The value must be of the format '<Sequence of characters> <sequence of characters>'.")
|
||||
|
||||
def STD05(self):
|
||||
return f"The 'useRestrictions' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD06(self):
|
||||
return f"The 'useRestrictions' for strain with accessionNumber {self.pk} is missing."
|
||||
|
||||
def STD07(self):
|
||||
return (f"The 'useRestrictions' for strain with accessionNumber {self.pk} is not according to the specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
||||
|
||||
def STD08(self):
|
||||
return f"The 'nagoyaConditions' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD09(self):
|
||||
return f"The 'nagoyaConditions' for strain with accessionNumber {self.pk} is missing."
|
||||
|
||||
def STD10(self):
|
||||
return (f"The 'nagoyaConditions' for strain with accessionNumber {self.pk} is not according to the specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
||||
|
||||
def STD11(self):
|
||||
return (f"The 'registeredCollection' for strain with accessionNumber {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
||||
|
||||
def STD12(self):
|
||||
return "The 'riskGroup' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD13(self):
|
||||
return f"The 'riskGroup' for strain with accessionNumber {self.pk} is missing."
|
||||
|
||||
def STD14(self):
|
||||
return (f"The 'riskGroup' for strain with accessionNumber {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2, 3, 4.")
|
||||
|
||||
def STD15(self):
|
||||
return (f"The 'dualUse' for strain with accessionNumber {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||
|
||||
def STD16(self):
|
||||
return (f"The “euQuarantine” for strain with accessionNumber {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||
|
||||
def STD17(self):
|
||||
return f"The 'organismType' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD18(self):
|
||||
return f"The 'organismType' for strain with accessionNumber {self.pk} is missing."
|
||||
|
||||
def STD19(self):
|
||||
return (f"The 'organismType' for strain with accessionNumber {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 'Algae', 'Archaea', 'Bacteria', 'Cyanobacteria', "
|
||||
"'Filamentous Fungi', 'Phage', 'Plasmid', 'Virus', 'Yeast', 1, 2, 3, 4, 5, 6, 7, 8, 9.")
|
||||
|
||||
def STD20(self):
|
||||
return f"The 'speciesName' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD21(self):
|
||||
return f"The 'speciesName' for strain with accessionNumber {self.pk} is missing."
|
||||
|
||||
def STD22(self):
|
||||
return f"The 'speciesName' for strain with accessionNumber {self.pk} is incorrect."
|
||||
|
||||
def STD23(self):
|
||||
return (f"The 'hybrid' for strain with accessionNumber {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||
|
||||
def STD24(self):
|
||||
return (f"The 'depositHistory' for strain with accessionNumber {self.pk} is incorrect."
|
||||
"The field includes entries separated by '<' meaning 'received from'."
|
||||
"Entries may include persons or CCs. The name of the CC should be followed by"
|
||||
"the month, when available, and year of the acquisition. Between parentheses,"
|
||||
"the strain designation or CC numbers and/or a name can also be entered when "
|
||||
"a name change has occurred.")
|
||||
|
||||
def STD25(self):
|
||||
return (f"The 'depositDate' for strain with accessionNumber {self.pk} is incorrect."
|
||||
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||
|
||||
def STD26(self):
|
||||
return (f"The 'accessionDate' for strain with accessionNumber {self.pk} is incorrect."
|
||||
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||
|
||||
def STD27(self):
|
||||
return (f"The 'collectionDate' for strain with accessionNumber {self.pk} is incorrect."
|
||||
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||
|
||||
def STD28(self):
|
||||
return (f"The 'isolationDate' for strain with accessionNumber {self.pk} is incorrect."
|
||||
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||
|
||||
def STD29(self):
|
||||
return (f"The 'temperatureGrowthRange' for strain with accessionNumber {self.pk} is incorrect."
|
||||
" It must have two decimal numbers separated by ','")
|
||||
|
||||
def STD30(self):
|
||||
return f"The 'temperatureGrowthRange' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD31(self):
|
||||
return f"The 'temperatureGrowthRange' for strain with accessionNumber {self.pk} is missing."
|
||||
|
||||
def STD32(self):
|
||||
return (f"The 'temperatureGrowthRange' for strain with accessionNumber {self.pk} is incorrect."
|
||||
" It must have two decimal numbers separated by ','.")
|
||||
|
||||
def STD33(self):
|
||||
return ("The 'recommendedTemperature' column is a mandatory field in the Strains Sheet. The column can not be empty.")
|
||||
|
||||
def STD34(self):
|
||||
return f"The 'recommendedTemperature' for strain with accessionNumber {self.pk} is missing."
|
||||
|
||||
def STD35(self):
|
||||
return f"The value of 'recommendedTemperature' for strain with accessionNumber {self.pk} is not in the Growth Media Sheet."
|
||||
|
||||
def STD36(self):
|
||||
return f"The 'supplyForms' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||
|
||||
def STD37(self):
|
||||
return f"The 'supplyForms' for strain with accessionNumber {self.pk} is missing."
|
||||
|
||||
def STD38(self):
|
||||
return f"The value of 'supplyForms' for strain with accessionNumber {self.pk} is not in the Forms of Supply Sheet."
|
||||
|
||||
def STD39(self):
|
||||
return (f"The 'geographicCoordinates' column for strain with accessionNumber {self.pk} is incorrect."
|
||||
"The allowed formats are two, three or four decimal numbers separated by ','. Moreover, the first number must be."
|
||||
"between [-90, 90], the second between [-180, 180], and the third and fourth refers to the precision and altitude, defined by decimal numbers."
|
||||
"Put a question mark for lack of precision or altitude when one of them is missing. Leave the values blank when both are missing. ")
|
||||
|
||||
def STD40(self):
|
||||
return (f"The 'country' column for strain with accessionNumber {self.pk} is incorrect."
|
||||
"The allowed formats are one decimal number between [-200, 8000].")
|
||||
def STD54(self):
|
||||
return (f"The 'country'column is a mandatory field in the Strains Sheet. The column can not be empty.")
|
||||
def STD55(self):
|
||||
return (f"The 'country' for strain with accessionNumber {self.pk} is missing.")
|
||||
|
||||
def STD41(self):
|
||||
return f"The value of 'ontobiotopeTerms' for strain with accessionNumber {self.pk} is not in the Ontobiotope Sheet."
|
||||
|
||||
def STD42(self):
|
||||
return (f"The 'gmo' for strain with accessionNumber {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2")
|
||||
|
||||
def STD43(self):
|
||||
return (f"The 'sexualState' for strain with accessionNumber {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 'Mata', 'Matalpha', 'Mata/Matalpha', "
|
||||
"'Matb', 'Mata/Matb', 'MTLa', 'MTLalpha', 'MTLa/MTLalpha', 'MAT1-1', 'MAT1-2', 'MAT1', 'MAT2', 'MT+', 'MT-'")
|
||||
|
||||
def STD44(self):
|
||||
return (f"The 'ploidy' for strain with accessionNumber {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 0, 1, 2, 3, 4, 9")
|
||||
|
||||
def STD45(self):
|
||||
msg = f"At least one of the values '{self.value}' of the literature field for strain {self.pk} are not in the literature sheet. "
|
||||
msg += "If the those values are Pubmed ids or DOIs, please ignore this messsage"
|
||||
return msg
|
||||
|
||||
def STD46(self):
|
||||
return (f"The 'geographicOrigin' for strain with accessionNumber {self.pk} is not according to specification."
|
||||
f"The 'geographicOrigin' column must consist of the ID's associated with the Geographic origin sheet.")
|
||||
|
||||
def STD47(self):
|
||||
return "The 'country' column is a mandatory field in the Strains sheet."
|
||||
|
||||
def STD48(self):
|
||||
return "The 'country' column is empty or has missing values."
|
||||
|
||||
def STD49(self):
|
||||
return (f"The “qps” for strain with accessionNumber {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||
|
||||
def STD50(self):
|
||||
return (f"The “axenicCulture” for strain with accessionNumber {self.pk} is not according to specification."
|
||||
f" Your value is {self.value} and the accepted values are 'Axenic', 'Not axenic'.")
|
||||
|
||||
def STD51(self):
|
||||
return f"The 'mirriAccessionNumber' must be unique. The '{self.pk}' is repeated."
|
||||
|
||||
def STD52(self):
|
||||
return (f"The 'mirriAccessionNumber' for strain with accessionNumber {self.pk} is incorrect."
|
||||
" It must have the expression MIRRI followed by 7 digits")
|
||||
|
||||
def STD53(self):
|
||||
return (f"The 'siteLinks' for strain with accessionNumber {self.pk} is incorrect."
|
||||
" The displayed expression it should be composed of: site name ';' website url." )
|
||||
|
||||
def STD56(self):
|
||||
return (f"The 'siteLinks' for strain with accessionNumber {self.pk} is incorrect."
|
||||
" The url must be valid. " )
|
||||
def STD57(self):
|
||||
return (f"The 'country' for strain with accessionNumber {self.pk} is incorrect."
|
||||
"This information must be expressed by using the ISO-3166 standard for country"
|
||||
"codes. The preferred set is ISO 3166-1 alpha-2 (two letters code), but ISO 3166-"
|
||||
"1 alpha-3 (three letters code) is also accepted. Former country codes must"
|
||||
"follow standard’s part three ISO 3166-3 (four letters code). Only one code can"
|
||||
"be included." )
|
||||
def STD58(self):
|
||||
return (f"The 'mtaFile' for strain with accessionNumber {self.pk} is incorrect."
|
||||
" The url must be valid. " )
|
||||
def STD59(self):
|
||||
return (f"The 'absFile' for strain with accessionNumber {self.pk} is incorrect."
|
||||
"The displayed expression it should be composed of: name ';' website url."
|
||||
"When only one URL is provided, the title may be omitted. In this case, the URL"
|
||||
"will be shown in clear to users." )
|
||||
def STD60(self):
|
||||
return (f"The 'absFile' for strain with accessionNumber {self.pk} is incorrect."
|
||||
" The url must be valid. ")
|
||||
def STD61(self):
|
||||
return (f"The 'sequenceLiterature' for strain with accessionNumber {self.pk} is incorrect."
|
||||
"Numeric identifiers separated by a semicolon ';'.")
|
||||
|
||||
def STD62(self):
|
||||
return (f"The 'plasmidCollections' for strain with accessionNumber {self.pk} is incorrect."
|
||||
"It should include the name of the plasmid followed by the CC number in"
|
||||
"parentheses. More than one plasmid can be reported, separated by ';'. "
|
||||
"Plasmid names should be provided as free text."
|
||||
"CC numbers should be composed by the CC acronym followed by a number"
|
||||
"separated by a space'. Numeric identifiers separated by a semicolon ';'.")
|
||||
|
||||
def STD63(self):
|
||||
return (f"The 'otherCollectionNumbers' for strain with accessionNumber {self.pk} is incorrect."
|
||||
" The value must be of the format '<Sequence of characters> <sequence of characters>'.")
|
||||
|
||||
def STD64(self):
|
||||
return (f"The 'type' for strain with accessionNumber {self.pk} is incorrect."
|
||||
f"Your value is {self.value} and the accepted values are 1, 2.")
|
||||
|
||||
def STD65(self):
|
||||
return (f"The 'status' for strain with accessionNumber {self.pk} is incorrect."
|
||||
"The structure should be 'type of <character string>.")
|
||||
|
||||
def STD68(self):
|
||||
return (f"The 'geographicOrigin'column is a mandatory field in the Strains Sheet. The column can not be empty.")
|
||||
|
||||
def STD69(self):
|
||||
return (f"The 'geographicOrigin' for strain with accessionNumber {self.pk} is missing.")
|
||||
|
||||
"""
|
||||
Genomic Information Error Codes
|
||||
"""
|
||||
|
||||
def GID01(self):
|
||||
return f"The 'Strain accessionNumber' (Strain AN) column is a mandatory field in the Genomic Information Sheet."
|
||||
|
||||
def GID02(self):
|
||||
return f"The 'Strain accessionNumber' (Strain AN) column is empty or has missing values."
|
||||
|
||||
def GID03(self):
|
||||
return f"The value of 'Strain accessionNumber' (Strain AN) {self.value} is not in the Strains sheet."
|
||||
|
||||
def GID04(self):
|
||||
return f"The 'Marker' column is a mandatory field in the Genomic Information Sheet. The column can not be empty."
|
||||
|
||||
def GID05(self):
|
||||
return f"The 'Marker' for genomic information with Strain AN {self.pk} is missing."
|
||||
|
||||
def GID06(self):
|
||||
return f"The value of 'Marker' {self.value} is not in the Markers sheet."
|
||||
|
||||
def GID07(self):
|
||||
return f"The 'INSDC AN' column is a mandatory field in the Genomic Information Sheet. The column can not be empty."
|
||||
|
||||
def GID08(self):
|
||||
return f"The 'INSDC AN' for genomic information with Strain AN {self.pk} is missing."
|
||||
|
||||
def GID09(self):
|
||||
return f"The 'INSDC AN' for genomic information with Strain AN {self.pk} is incorrect."
|
||||
|
||||
def GID10(self):
|
||||
return (f"The 'Sequence' for genomic information with Strain AN {self.pk} is incorrect."
|
||||
" It must be a sequence of 'G', 'T', 'A', 'C' characteres of any length and without white spaces.")
|
||||
|
||||
def GID11(self):
|
||||
return (f"The 'Sequence' for genomic information with Strain AN {self.pk} is incorrect."
|
||||
"An INSDC accession number is an alphanumeric"
|
||||
"code made by a fixed number of letters followed by a fixed number of digits,"
|
||||
"without any separation. For sequences, the code is currently made of two"
|
||||
"letters followed by six numbers.")
|
||||
|
||||
|
||||
"""
|
||||
Version Error Codes
|
||||
"""
|
||||
|
||||
def VRS01(self):
|
||||
return "The 'Version' columns is a mandatory field in the Version Sheet."
|
||||
|
||||
def VRS02(self):
|
||||
return "The 'Version' columns is empty or has missing values."
|
||||
|
||||
def VRS03(self):
|
||||
return "The 'Date' columns is a mandatory field in the Control Sheet."
|
||||
|
||||
def VRS04(self):
|
||||
return "The 'Date' columns is empty or has missing values."
|
||||
|
||||
def VRS05(self):
|
||||
return f"The version {self.value} is the only one to be used."
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Ontobiotope Error Codes
|
||||
"""
|
||||
|
||||
def OTD01(self):
|
||||
return "The 'ID' columns is a mandatory field in the Ontobiotope Sheet."
|
||||
|
||||
def OTD02(self):
|
||||
return "The 'ID' columns is empty or has missing values."
|
||||
|
||||
#def OTD03(self):
|
||||
return "The 'Name' columns is a mandatory field in the Ontobiotope Sheet. The column can not be empty."
|
||||
|
||||
#def OTD04(self):
|
||||
return f"The 'Name' for ontobiotope with ID {self.pk} is missing."
|
||||
|
||||
|
||||
|
||||
|
||||
@ -4,27 +4,51 @@ from io import BytesIO
|
||||
from zipfile import BadZipfile
|
||||
from datetime import datetime
|
||||
from calendar import monthrange
|
||||
|
||||
import requests
|
||||
from openpyxl import load_workbook
|
||||
import pycountry
|
||||
|
||||
from mirri.io.parsers.excel import workbook_sheet_reader, get_all_cell_data_from_sheet
|
||||
from mirri.validation.error_logging import ErrorLog, Error
|
||||
from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
|
||||
ERROR_CODE, FIELD, MANDATORY, MATCH,
|
||||
MISSING, MULTIPLE, NAGOYA, NUMBER, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON,
|
||||
TYPE, UNIQUE, VALIDATION, VALUES, BIBLIO)
|
||||
MISSING, MULTIPLE, NAGOYA, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON,
|
||||
TYPE, UNIQUE, VALIDATION, VALUES, BIBLIO, DOMINIO,URL_DOMINIO, ISO, URL_TITLE,JUST_URL,TITLE,
|
||||
HISTORY,NAGOYA1, VERSION)
|
||||
from mirri.settings import LOCATIONS, SUBTAXAS
|
||||
from mirri.validation.validation_conf_20200601 import MIRRI_20200601_VALLIDATION_CONF
|
||||
from mirri.validation.validation_conf_12052023 import version_config
|
||||
|
||||
from mirri.validation.validation_conf_12052023 import MIRRI_12052023_VALLIDATION_CONF
|
||||
|
||||
|
||||
def validate_mirri_excel(fhand, version="20200601"):
|
||||
if version == "20200601":
|
||||
configuration = MIRRI_20200601_VALLIDATION_CONF
|
||||
def validate_mirri_excel(fhand, version= "5.1.2" ):
|
||||
if version == "5.1.2":
|
||||
configuration = MIRRI_12052023_VALLIDATION_CONF
|
||||
else:
|
||||
raise NotImplementedError("Only version20200601 is implemented")
|
||||
|
||||
raise NotImplementedError("Only version 5.1.2 is implemented")
|
||||
|
||||
return validate_excel(fhand, configuration)
|
||||
|
||||
def version(value , validation_conf=None):
|
||||
if value is None:
|
||||
return True
|
||||
try:
|
||||
for version in version_config:
|
||||
if value == version :
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
def validate_country_code(value,validation_conf=None):
|
||||
if value is None:
|
||||
return True
|
||||
try:
|
||||
if pycountry.countries.get(alpha_2=value) or pycountry.countries.get(alpha_3=value) or pycountry.historic_countries.get(alpha_4 = value):
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
def validate_excel(fhand, configuration):
|
||||
validation_conf = configuration['sheet_schema']
|
||||
@ -185,11 +209,14 @@ def validate_row(row, validation_steps, in_memory_sheets):
|
||||
kind = validation_step[TYPE]
|
||||
error_code = validation_step[ERROR_CODE]
|
||||
if kind == NAGOYA:
|
||||
if not is_valid_nagoya(row, in_memory_sheets):
|
||||
if not is_valid_nagoya_v12052023(row, in_memory_sheets):
|
||||
return error_code
|
||||
elif kind == BIBLIO:
|
||||
if not is_valid_pub(row):
|
||||
return error_code
|
||||
elif kind == NAGOYA1:
|
||||
if not is_valid_nago(row):
|
||||
return error_code
|
||||
else:
|
||||
msg = f'{kind} is not a recognized row validation type method'
|
||||
raise NotImplementedError(msg)
|
||||
@ -207,49 +234,70 @@ def validate_cell(value, validation_steps, crossrefs, shown_values, label):
|
||||
|
||||
if error_code is not None:
|
||||
return error_code
|
||||
|
||||
|
||||
|
||||
def is_valid_pub(row):
|
||||
pub_id = row.get('ID', None)
|
||||
pub_pmid = row.get('PMID', None)
|
||||
pub_doi = row.get('DOI', None)
|
||||
title = row.get('Title', None)
|
||||
full_reference = row.get('Full reference', None)
|
||||
authors = row.get('Authors', None)
|
||||
journal = row.get('Journal', None)
|
||||
year = row.get('Year', None)
|
||||
volumen = row.get('Volumen', None)
|
||||
volumen = row.get('Volume', None)
|
||||
first_page = row.get('First page', None)
|
||||
book_title = row.get('Book title', None)
|
||||
editors = row.get('Editors', None)
|
||||
publishers = row.get('Publishers', None)
|
||||
|
||||
if full_reference:
|
||||
if (pub_id != None and pub_doi != None) or (pub_id != None and pub_pmid != None) or (pub_id != None and full_reference != None) or (pub_id != None and authors != None and title != None and journal != None and year != None and volumen != None and first_page != None) :
|
||||
return True
|
||||
is_journal = bool(title)
|
||||
|
||||
if (is_journal and (not authors or not journal or not not year or
|
||||
not volumen or not first_page)):
|
||||
return False
|
||||
if (not is_journal and (not authors or not year or
|
||||
not editors or not publishers or not book_title)):
|
||||
return False
|
||||
# if (is_journal and (not authors or not journal or not not year or
|
||||
# not volumen or not first_page)):
|
||||
# return False
|
||||
#if (not is_journal and (not authors or not year or
|
||||
# not editors or not publishers or not book_title)):
|
||||
# return False
|
||||
|
||||
return False
|
||||
|
||||
def is_valid_nago(row):
|
||||
if not row:
|
||||
return True
|
||||
status = row.get("status", None)
|
||||
type = row.get("type", None)
|
||||
regex = r'^[a-zA-Z\s.\'-]+$'
|
||||
|
||||
if status != None and type != None:
|
||||
if (re.match(regex, status) and type==1):
|
||||
return False
|
||||
if (type == 2 and status is None):
|
||||
return False
|
||||
return True
|
||||
|
||||
def parsee_mirri_excel(row, in_memory_sheets, version=""):
|
||||
if version == "12052023":
|
||||
return is_valid_nagoya_v12052023 (row, in_memory_sheets)
|
||||
else:
|
||||
raise NotImplementedError("Only version is implemented")
|
||||
|
||||
def is_valid_nagoya(row, in_memory_sheets): # sourcery skip: return-identity
|
||||
location_index = row.get('Geographic origin', None)
|
||||
def is_valid_nagoya_v12052023(row, in_memory_sheets): # sourcery skip: return-identity
|
||||
location_index = row.get('geographicOrigin', None)
|
||||
if location_index is None:
|
||||
country = None
|
||||
else:
|
||||
geo_origin = in_memory_sheets[LOCATIONS].get(location_index, {})
|
||||
country = geo_origin.get('Country', None)
|
||||
|
||||
_date = row.get("Date of collection", None)
|
||||
_date = row.get("collectionDate", None)
|
||||
if _date is None:
|
||||
_date = row.get("Date of isolation", None)
|
||||
_date = row.get("isolationDate", None)
|
||||
if _date is None:
|
||||
_date = row.get("Date of deposit", None)
|
||||
_date = row.get("depositDate", None)
|
||||
if _date is None:
|
||||
_date = row.get("Date of inclusion in the catalogue", None)
|
||||
_date = row.get("accessionDate", None)
|
||||
if _date is not None:
|
||||
year = _date.year if isinstance(_date, datetime) else int(str(_date)[:4])
|
||||
else:
|
||||
@ -258,9 +306,9 @@ def is_valid_nagoya(row, in_memory_sheets): # sourcery skip: return-identity
|
||||
if year is not None and year >= 2014 and country is None:
|
||||
return False
|
||||
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_valid_regex(value, validation_conf):
|
||||
if value is None:
|
||||
return True
|
||||
@ -310,7 +358,9 @@ def is_valid_choices(value, validation_conf):
|
||||
values = [v.strip() for v in str(value).split(separator)]
|
||||
else:
|
||||
values = [str(value).strip()]
|
||||
|
||||
sorted_values = sorted(values)
|
||||
if sorted_values != values:
|
||||
return False
|
||||
return all(value in choices for value in values)
|
||||
|
||||
|
||||
@ -352,47 +402,145 @@ def is_valid_date(value, validation_conf):
|
||||
return True
|
||||
|
||||
|
||||
def is_valid_coords(value, validation_conf=None):
|
||||
# sourcery skip: return-identity
|
||||
def is_valid_dominio(value, validation_conf=None):
|
||||
if value is None:
|
||||
return True
|
||||
try:
|
||||
items = [i.strip() for i in value.split(";")]
|
||||
latitude = float(items[0])
|
||||
longitude = float(items[1])
|
||||
if len(items) > 2:
|
||||
precision = float(items[2])
|
||||
if latitude < -90 or latitude > 90:
|
||||
return False
|
||||
if longitude < -180 or longitude > 180:
|
||||
return False
|
||||
if len(items) >1:
|
||||
for i in range(0, len(items),2):
|
||||
nameSite = str(items[i])
|
||||
urlSite = str(items[i+1])
|
||||
dominio = urlSite.split(".")[-2]
|
||||
if nameSite.lower() != dominio:
|
||||
return False
|
||||
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
return False
|
||||
|
||||
def is_valid_title(value, validation_conf=None):
|
||||
if value is None:
|
||||
return True
|
||||
try:
|
||||
items = [i.strip() for i in value.split(";")]
|
||||
if len(items) >1:
|
||||
for i in range(0, len(items),2):
|
||||
nameSite = (items[i])
|
||||
urlSite = str(items[i+1])
|
||||
regex = r'^(http|https):\/\/[a-z0-9\-\.]+\.[a-z]{2,}([/a-z0-9\-\.]*)*$'
|
||||
if re.match(regex, nameSite) or isinstance(nameSite, int) or nameSite == '':
|
||||
return False
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def is_valid_url_title(value, validation_conf=None):
|
||||
if value is None:
|
||||
return True
|
||||
try:
|
||||
items = [i.strip() for i in value.split(";")]
|
||||
if len(items) ==1:
|
||||
urlSite = str(items[0])
|
||||
response = requests.head(urlSite)
|
||||
if response.status_code != 200:
|
||||
return False
|
||||
|
||||
else:
|
||||
items = [i.strip() for i in value.split(";")]
|
||||
for i in range(0, len(items),2):
|
||||
nameSite = (items[i])
|
||||
urlSite = str(items[i+1])
|
||||
response = requests.head(urlSite)
|
||||
if response.status_code != 200:
|
||||
return False
|
||||
|
||||
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
def is_valid_url_dominio(value, validation_conf=None):
|
||||
if value is None:
|
||||
return True
|
||||
try:
|
||||
items = [i.strip() for i in value.split(";")]
|
||||
for i in range(0, len(items),2):
|
||||
nameSite = str(items[i])
|
||||
urlSite = str(items[i+1])
|
||||
response = requests.head(urlSite)
|
||||
if response.status_code != 200:
|
||||
return False
|
||||
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
def is_valid_just_url(value, validation_conf=None):
|
||||
if value is None:
|
||||
return True
|
||||
try:
|
||||
items = [i.strip() for i in value.split(";")]
|
||||
for i in items:
|
||||
nameSite = str(items[0])
|
||||
response = requests.head(i)
|
||||
if response.status_code != 200:
|
||||
return False
|
||||
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
def is_valid_history(value, validation_conf=None):
|
||||
if value is None:
|
||||
return True
|
||||
try:
|
||||
items = [i.strip() for i in value.split("<")]
|
||||
for i in items:
|
||||
regex1 = r'^[a-zA-Z0-9 &,;.:''-]+,?\s*((19|20)\d{2})'
|
||||
regex2 = r'^[a-zA-Z0-9 &,;.:''-]+,?\s*[a-zA-Z0-9 &,;.''-] (19|20)\d{2}\s\([a-zA-Z0-9 &,;.''-:]+\)'
|
||||
regex3 = r'^[a-zA-Z0-9 &,;.:''-]+\,?\s*[a-zA-Z0-9 &,;.''-]'
|
||||
regex4 = r'^[a-zA-Z0-9 &,;.''-]+,?\s*(19|20)\d{2}\s\([a-zA-Z0-9 .''-,;&:]+\)'
|
||||
regex5 = r'^[a-zA-Z0-9 &,;.:''-]+,?\s*\([a-zA-Z0-9 &,;.''-:]+\) (19|20)\d{2}'
|
||||
if re.match(regex1, i):
|
||||
return True
|
||||
elif re.match(regex2, i):
|
||||
return True
|
||||
elif re.match(regex3, i):
|
||||
return True
|
||||
elif re.match(regex4, i):
|
||||
return True
|
||||
elif re.match(regex5, i):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
def is_valid_coords(value, validation_conf=None):
|
||||
if value is None:
|
||||
return True
|
||||
try:
|
||||
|
||||
regex1 = r'^-?(90(\.0+)?|[1-8]?\d(\.\d+)?)(\s*;\s*-?(180(\.0+)?|((1[0-7]\d)|(\d{1,2}))(\.\d+)?))*$'
|
||||
regex2 = r'^-?(90(\.0+)?|[1-8]?\d(\.\d+)?)\s*;\s*-?(180(\.0+)?|((1[0-7]\d)|(\d{1,2}))(\.\d+)?)\s*;\s*(\d+\.\d+|\?)\s*;\s*(\d+\.\d+|\?)$|^(\d+\.\d+|\?)$|^\s*;\s*$'
|
||||
|
||||
if not re.match(regex1, value) and not re.match(regex2, value):
|
||||
return False
|
||||
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
def is_valid_missing(value, validation_conf=None):
|
||||
return value is not None
|
||||
|
||||
|
||||
def is_valid_number(value, validation_conf):
|
||||
if value is None:
|
||||
return True
|
||||
try:
|
||||
value = float(value)
|
||||
except TypeError:
|
||||
return False
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
_max = validation_conf.get('max', None)
|
||||
_min = validation_conf.get('min', None)
|
||||
if (_max is not None and value > _max) or (_min is not None and value < _min):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_valid_taxon(value, validation_conf=None):
|
||||
multiple = validation_conf.get(MULTIPLE, False)
|
||||
separator = validation_conf.get(SEPARATOR, ';')
|
||||
@ -429,6 +577,8 @@ def _is_valid_taxon(value):
|
||||
|
||||
|
||||
def is_valid_unique(value, validation_conf):
|
||||
if not value:
|
||||
return True
|
||||
label = validation_conf['label']
|
||||
shown_values = validation_conf['shown_values']
|
||||
if label not in shown_values:
|
||||
@ -444,7 +594,6 @@ def is_valid_unique(value, validation_conf):
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_valid_file(path):
|
||||
try:
|
||||
with path.open("rb") as fhand:
|
||||
@ -464,8 +613,15 @@ VALIDATION_FUNCTIONS = {
|
||||
CROSSREF: is_valid_crossrefs,
|
||||
DATE: is_valid_date,
|
||||
COORDINATES: is_valid_coords,
|
||||
NUMBER: is_valid_number,
|
||||
TAXON: is_valid_taxon,
|
||||
TITLE: is_valid_title,
|
||||
DOMINIO: is_valid_dominio,
|
||||
URL_TITLE: is_valid_url_title,
|
||||
URL_DOMINIO: is_valid_url_dominio,
|
||||
JUST_URL: is_valid_just_url,
|
||||
ISO: validate_country_code,
|
||||
HISTORY: is_valid_history,
|
||||
VERSION: version,
|
||||
UNIQUE: is_valid_unique}
|
||||
|
||||
|
||||
@ -16,9 +16,20 @@ MATCH = 'match'
|
||||
VALUES = 'values'
|
||||
DATE = 'date'
|
||||
COORDINATES = 'coord'
|
||||
COORDINATES1 = 'coord1'
|
||||
NUMBER = 'number'
|
||||
TAXON = 'taxon'
|
||||
UNIQUE = 'unique'
|
||||
ROW_VALIDATION = 'row_validation'
|
||||
NAGOYA = 'nagoya'
|
||||
BIBLIO = 'bibliography'
|
||||
DOMINIO= 'is_valid_dominio'
|
||||
TITLE= 'is_valid_title'
|
||||
URL_DOMINIO = 'urll_valid_dominio'
|
||||
URL_TITLE= 'is_valid_url_title'
|
||||
ISO = 'validate_country_code'
|
||||
JUST_URL= 'is_valid_just_url'
|
||||
HISTORY= 'is_valid_history'
|
||||
MEU='is_valid_crossrefs_meu'
|
||||
NAGOYA1 = 'nayoga1'
|
||||
VERSION = 'version'
|
||||
@ -1,14 +1,20 @@
|
||||
#!/usr/bin/env python
|
||||
import pandas as pd
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from mirri.validation.excel_validator import validate_mirri_excel
|
||||
import warnings
|
||||
import warnings
|
||||
warnings.simplefilter("ignore")
|
||||
|
||||
from mirri.validation.excel_validator import validate_mirri_excel
|
||||
|
||||
def main():
|
||||
path = Path(sys.argv[1])
|
||||
error_log = validate_mirri_excel(path.open("rb"))
|
||||
version = str(sys.argv[2])
|
||||
try:
|
||||
|
||||
error_log = validate_mirri_excel(path.open("rb"), version=version)
|
||||
|
||||
except NotImplementedError as e:
|
||||
print(e)
|
||||
|
||||
for errors in error_log.get_errors().values():
|
||||
for error in errors:
|
||||
@ -16,4 +22,4 @@ def main():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
@ -1,10 +1,13 @@
|
||||
from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
|
||||
ERROR_CODE, FIELD, MANDATORY, MATCH,
|
||||
MISSING, MULTIPLE, NAGOYA, NUMBER, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON, TYPE,
|
||||
UNIQUE,
|
||||
VALIDATION, VALUES, BIBLIO)
|
||||
MISSING, MULTIPLE, NAGOYA, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON, TYPE,
|
||||
UNIQUE,VERSION,
|
||||
VALIDATION, VALUES, BIBLIO, DOMINIO, URL_DOMINIO,ISO, JUST_URL, URL_TITLE, TITLE, HISTORY,NAGOYA1)
|
||||
from mirri.settings import (ONTOBIOTOPE, LOCATIONS, GROWTH_MEDIA, GENOMIC_INFO,
|
||||
STRAINS, LITERATURE_SHEET, SEXUAL_STATE_SHEET, MARKERS)
|
||||
STRAINS, LITERATURE_SHEET, SEXUAL_STATE_SHEET, MARKERS, CONTROL_SHEET)
|
||||
|
||||
|
||||
|
||||
# GEOGRAPHIC_ORIGIN
|
||||
# SEXUAL_STATE_SHEET,
|
||||
# RESOURCE_TYPES_VALUES,
|
||||
@ -12,9 +15,12 @@ from mirri.settings import (ONTOBIOTOPE, LOCATIONS, GROWTH_MEDIA, GENOMIC_INFO,
|
||||
# PLOIDY_SHEET)
|
||||
|
||||
|
||||
|
||||
|
||||
STRAIN_FIELDS = [
|
||||
|
||||
{
|
||||
FIELD: "Accession number",
|
||||
FIELD: "accessionNumber",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: 'STD01'},
|
||||
{TYPE: UNIQUE, ERROR_CODE: 'STD03'},
|
||||
@ -23,16 +29,24 @@ STRAIN_FIELDS = [
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Restrictions on use",
|
||||
FIELD: "useRestrictions",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD05"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD06"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD06"},
|
||||
{TYPE: CHOICES, VALUES: ["1", "2", "3"],
|
||||
MULTIPLE: False, ERROR_CODE: "STD07"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "mirriAccessionNumber",
|
||||
VALIDATION: [
|
||||
{TYPE: UNIQUE, ERROR_CODE: 'STD51'},
|
||||
{TYPE: REGEXP, MATCH: "^MIRRI[0-9]{7}$", ERROR_CODE: "STD52"},
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
FIELD: "Nagoya protocol restrictions and compliance conditions",
|
||||
FIELD: "nagoyaConditions",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD08"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD09"},
|
||||
@ -41,29 +55,53 @@ STRAIN_FIELDS = [
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "ABS related files",
|
||||
VALIDATION: [],
|
||||
FIELD: "absFile",
|
||||
VALIDATION: [
|
||||
{TYPE: TITLE, ERROR_CODE: "STD59"},
|
||||
{TYPE: URL_TITLE, ERROR_CODE: "STD60",
|
||||
MULTIPLE: True, SEPARATOR: ";"},
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
FIELD: "siteLinks",
|
||||
VALIDATION: [
|
||||
{TYPE: DOMINIO, ERROR_CODE: "STD53",
|
||||
MULTIPLE: False, SEPARATOR: ";"},
|
||||
{TYPE: URL_DOMINIO, ERROR_CODE: "STD56",
|
||||
MULTIPLE: False, SEPARATOR: ";"},
|
||||
],
|
||||
},
|
||||
{
|
||||
FIELD: "MTA file",
|
||||
VALIDATION: [],
|
||||
FIELD: "mtaFile",
|
||||
VALIDATION: [
|
||||
{TYPE: JUST_URL, ERROR_CODE: "STD58",
|
||||
MULTIPLE: True, SEPARATOR: ";"},
|
||||
],
|
||||
},
|
||||
{
|
||||
FIELD: "Other culture collection numbers",
|
||||
# VALIDATION: [
|
||||
# {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD07",
|
||||
# MULTIPLE: True, SEPARATOR: ";"}
|
||||
# ]
|
||||
FIELD: "otherCollectionNumbers",
|
||||
VALIDATION: [
|
||||
{TYPE: REGEXP, MATCH: "([^ ]* [^ ]*)(; [^ ]* [^ ]*)*$", ERROR_CODE: "STD63",
|
||||
MULTIPLE: True, SEPARATOR: ';'},
|
||||
#{TYPE: CROSSREF, CROSSREF_NAME: "Strains", ERROR_CODE: "STD64"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Strain from a registered collection",
|
||||
FIELD: "registeredCollection",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||
ERROR_CODE: "STD11"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "type",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["1", "2"], ERROR_CODE: "STD64"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Risk Group",
|
||||
FIELD: "riskGroup",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD12"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD13"},
|
||||
@ -72,33 +110,41 @@ STRAIN_FIELDS = [
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Dual use",
|
||||
FIELD: "dualUse",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||
ERROR_CODE: "STD15"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Quarantine in Europe",
|
||||
FIELD: "euQuarantine",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||
ERROR_CODE: "STD16"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "axenicCulture",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["Axenic", "Not axenic"],
|
||||
ERROR_CODE: "STD50"}
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
FIELD: "Organism type",
|
||||
FIELD: "organismType",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD17"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD18"},
|
||||
{TYPE: CHOICES, VALUES: ["Algae", "Archaea", "Bacteria",
|
||||
"Cyanobacteria", "Filamentous Fungi",
|
||||
"Phage", "Plasmid", "Virus", "Yeast",
|
||||
"1", "2", "3", "4", "5", "6", "7", "8", "9"],
|
||||
"Cyanobacteria", "Filamentous Fungi", "Filamentous fungi",
|
||||
"Yeast", "Microalgae",
|
||||
"1", "2", "3", "4", "5", "6", "7"],
|
||||
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD19"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Taxon name",
|
||||
FIELD: "speciesName",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD20"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD21"},
|
||||
@ -107,73 +153,69 @@ STRAIN_FIELDS = [
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Infrasubspecific names",
|
||||
FIELD: "infrasubspecificNames",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Comment on taxonomy",
|
||||
FIELD: "taxonomyComments",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Interspecific hybrid",
|
||||
FIELD: "hybrid",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||
ERROR_CODE: "STD23"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Status",
|
||||
FIELD: "status",
|
||||
VALIDATION: [
|
||||
{TYPE: REGEXP, MATCH: "^(type of|neotype of|holotype of |epitype of) ([a-zA-Z .'-]+)$", ERROR_CODE: "STD65"},
|
||||
|
||||
]
|
||||
|
||||
},
|
||||
{
|
||||
FIELD: "History of deposit",
|
||||
FIELD: "depositHistory",
|
||||
VALIDATION: [
|
||||
# {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD24", # modify the regex
|
||||
# MULTIPLE: True, SEPARATOR: ";"}
|
||||
{TYPE: HISTORY, ERROR_CODE: 'STD24'},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Depositor"
|
||||
FIELD: "depositor",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Date of deposit",
|
||||
FIELD: "depositDate",
|
||||
VALIDATION: [
|
||||
{TYPE: DATE, ERROR_CODE: "STD25"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Date of inclusion in the catalogue",
|
||||
FIELD: "accessionDate",
|
||||
VALIDATION: [
|
||||
{TYPE: DATE, ERROR_CODE: "STD26"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Collected by",
|
||||
FIELD: "collector",
|
||||
VALIDATION: []
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
FIELD: "substrate",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Date of collection",
|
||||
VALIDATION: [
|
||||
{TYPE: DATE, ERROR_CODE: "STD27"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Isolated by",
|
||||
},
|
||||
{
|
||||
FIELD: "Date of isolation",
|
||||
VALIDATION: [
|
||||
{TYPE: DATE, ERROR_CODE: "STD28"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Substrate/host of isolation",
|
||||
},
|
||||
{
|
||||
FIELD: "Tested temperature growth range",
|
||||
FIELD: "temperatureGrowthRange",
|
||||
VALIDATION: [
|
||||
{TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?',
|
||||
ERROR_CODE: "STD29", MULTIPLE: True, SEPARATOR: ";"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Recommended growth temperature",
|
||||
FIELD: "recommendedTemperature",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD30"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD31"},
|
||||
@ -182,17 +224,9 @@ STRAIN_FIELDS = [
|
||||
MULTIPLE: True, SEPARATOR: ";"}
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
FIELD: "Recommended medium for growth",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD33"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD34"},
|
||||
{TYPE: CROSSREF, CROSSREF_NAME: "Growth media",
|
||||
MULTIPLE: True, SEPARATOR: "/", ERROR_CODE: "STD35"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Form of supply",
|
||||
FIELD: "supplyForms",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD36"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD37"},
|
||||
@ -202,52 +236,70 @@ STRAIN_FIELDS = [
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Other denomination",
|
||||
FIELD: "otherDenomination",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Coordinates of geographic origin",
|
||||
FIELD: "geographicCoordinates",
|
||||
VALIDATION: [
|
||||
{TYPE: COORDINATES, ERROR_CODE: "STD39"},
|
||||
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Altitude of geographic origin",
|
||||
VALIDATION: [
|
||||
{TYPE: NUMBER, 'max': 8000, 'min': -200, ERROR_CODE: "STD40"},
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
# value can be in the cell or in another sheet. Don't configure this
|
||||
FIELD: "Geographic origin",
|
||||
FIELD: "geographicOrigin",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD68"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD69"},
|
||||
{TYPE: CROSSREF, CROSSREF_NAME: "Geographic origin", ERROR_CODE: "STD46"},
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
FIELD: "isolationHabitat",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Isolation habitat",
|
||||
},
|
||||
{
|
||||
FIELD: "Ontobiotope term for the isolation habitat",
|
||||
FIELD: "ontobiotopeTerms",
|
||||
VALIDATION: [
|
||||
{TYPE: CROSSREF, CROSSREF_NAME: "Ontobiotope",
|
||||
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD41"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "qps",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||
ERROR_CODE: "STD49"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "GMO",
|
||||
FIELD: "gmo",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||
ERROR_CODE: "STD42"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "GMO construction information",
|
||||
FIELD: "gmoConstruction",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Mutant information",
|
||||
FIELD: "mutant",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Genotype",
|
||||
FIELD: "genotype",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Sexual state",
|
||||
FIELD: "Plant pathogenicity code",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "sexualState",
|
||||
VALIDATION: [
|
||||
{TYPE: CROSSREF, CROSSREF_NAME: SEXUAL_STATE_SHEET,
|
||||
ERROR_CODE: "STD43"}
|
||||
@ -258,46 +310,78 @@ STRAIN_FIELDS = [
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Ploidy",
|
||||
FIELD: "ploidy",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["0", "1", "2", "3", "4", "9"],
|
||||
{TYPE: CHOICES, VALUES: ["1", "2", "3", "4", "5", "9"],
|
||||
ERROR_CODE: "STD44"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Plasmids",
|
||||
FIELD: "plasmids",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Plasmids collections fields",
|
||||
FIELD: "plasmidCollections",
|
||||
VALIDATION: [
|
||||
{TYPE: REGEXP, MATCH: "([a-zA-Z .'-]+)\(([a-zA-Z .'-]+) (\d+)\)(\s*;([a-zA-Z .'-]+)\(([a-zA-Z .'-]+) (\d+)\))*$",
|
||||
ERROR_CODE: "STD62"}
|
||||
]
|
||||
},
|
||||
{
|
||||
# value can be in the cell or in another sheet. Don't configure this
|
||||
FIELD: "Literature",
|
||||
FIELD: "identificationLiterature",
|
||||
VALIDATION: [
|
||||
{TYPE: CROSSREF, CROSSREF_NAME: LITERATURE_SHEET,
|
||||
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD45"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Plant pathogenicity code",
|
||||
FIELD: "pathogenicity",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Pathogenicity",
|
||||
FIELD: "enzymes",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Enzyme production",
|
||||
FIELD: "metabolites",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Production of metabolites",
|
||||
FIELD: "applications",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Applications",
|
||||
FIELD: "remarks",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Remarks"
|
||||
FIELD: "sequenceLiterature",
|
||||
VALIDATION: [
|
||||
{TYPE: REGEXP, MATCH: "^\d+(\s*;?\s*\d+)*$", ERROR_CODE: "STD61"},
|
||||
]
|
||||
|
||||
},
|
||||
{
|
||||
FIELD: "Literature linked to the sequence/genome",
|
||||
|
||||
{
|
||||
FIELD: "recommendedMedium",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD33"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD34"},
|
||||
{TYPE: CROSSREF, CROSSREF_NAME: "Growth media",
|
||||
MULTIPLE: True, SEPARATOR: "/", ERROR_CODE: "STD35"}
|
||||
]
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
FIELD: "country",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD54"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD55"},
|
||||
{TYPE: ISO, ERROR_CODE: "STD57"},
|
||||
#{TYPE: CROSSREF, CROSSREF_NAME: COUNTRY_CODES_SHEET, ERROR_CODE: "STD57"}
|
||||
]
|
||||
},
|
||||
]
|
||||
SHEETS_SCHEMA = {
|
||||
@ -317,7 +401,7 @@ SHEETS_SCHEMA = {
|
||||
FIELD: "Country",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "GOD03"},
|
||||
{TYPE: MISSING, ERROR_CODE: "GOD04"}
|
||||
{TYPE: MISSING, ERROR_CODE: "GOD04"},
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -389,6 +473,7 @@ SHEETS_SCHEMA = {
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "GID07"},
|
||||
{TYPE: MISSING, ERROR_CODE: "GID08"},
|
||||
{TYPE: REGEXP, MATCH: "^[A-Z]{2}[0-9]{6}$", ERROR_CODE: "GID11"},
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -399,11 +484,9 @@ SHEETS_SCHEMA = {
|
||||
},
|
||||
STRAINS: {
|
||||
"acronym": "STD",
|
||||
'id_field': 'Accession number',
|
||||
'id_field': 'accessionNumber',
|
||||
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS05"},
|
||||
ROW_VALIDATION: [
|
||||
{TYPE: NAGOYA, ERROR_CODE: "STD46"},
|
||||
],
|
||||
ROW_VALIDATION: [],
|
||||
COLUMNS: STRAIN_FIELDS,
|
||||
},
|
||||
LITERATURE_SHEET: {
|
||||
@ -412,7 +495,7 @@ SHEETS_SCHEMA = {
|
||||
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS03"},
|
||||
ROW_VALIDATION: [
|
||||
{TYPE: BIBLIO, ERROR_CODE: 'LID17'}
|
||||
],
|
||||
],
|
||||
COLUMNS: [
|
||||
{
|
||||
FIELD: "ID",
|
||||
@ -421,6 +504,18 @@ SHEETS_SCHEMA = {
|
||||
{TYPE: MISSING, ERROR_CODE: "LID02"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "PMID",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "LID18"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "DOI",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "LID20"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Full reference",
|
||||
VALIDATION: [
|
||||
@ -465,7 +560,6 @@ SHEETS_SCHEMA = {
|
||||
FIELD: "First page",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "LID15"},
|
||||
{TYPE: MISSING, ERROR_CODE: "LID16"},
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -504,13 +598,38 @@ SHEETS_SCHEMA = {
|
||||
},
|
||||
{
|
||||
FIELD: "Name",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "OTD03"},
|
||||
{TYPE: MISSING, ERROR_CODE: "OTD04"},
|
||||
]
|
||||
VALIDATION: []
|
||||
},
|
||||
]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
CONTROL_SHEET: {
|
||||
"acronym": "VRS",
|
||||
"id_field": "Version",
|
||||
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS09"},
|
||||
COLUMNS: [
|
||||
{
|
||||
FIELD: "Version",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "VRS01"},
|
||||
{TYPE: MISSING, ERROR_CODE: "VRS02"},
|
||||
{TYPE: VERSION, ERROR_CODE: "VRS05"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Date",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "VRS03"},
|
||||
{TYPE: MISSING, ERROR_CODE: "VRS04"},
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
|
||||
MARKERS: {
|
||||
"acronym": "MKD",
|
||||
"id_field": "Acronym",
|
||||
@ -524,22 +643,31 @@ SHEETS_SCHEMA = {
|
||||
VALIDATION: []
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
CROSS_REF_CONF = {
|
||||
ONTOBIOTOPE: ['ID', 'Name'],
|
||||
LITERATURE_SHEET: ['ID'],
|
||||
LOCATIONS: ['Locality'],
|
||||
ONTOBIOTOPE: ['ID'],
|
||||
LITERATURE_SHEET: ['ID', 'DOI', 'PMID', 'Full reference', 'Authors', 'Title', 'Journal', 'Year', 'Volume', 'First page'],
|
||||
LOCATIONS: ['ID', 'Locality'],
|
||||
GROWTH_MEDIA: ['Acronym'],
|
||||
STRAINS: ["Accession number"],
|
||||
STRAINS: ["accessionNumber"],
|
||||
SEXUAL_STATE_SHEET: [],
|
||||
MARKERS: ["Acronym"],
|
||||
|
||||
}
|
||||
|
||||
MIRRI_20200601_VALLIDATION_CONF = {
|
||||
MIRRI_12052023_VALLIDATION_CONF = {
|
||||
'sheet_schema': SHEETS_SCHEMA,
|
||||
'cross_ref_conf': CROSS_REF_CONF,
|
||||
'keep_sheets_in_memory': [
|
||||
{'sheet_name': LOCATIONS, 'indexed_by': 'Locality'}]
|
||||
}
|
||||
|
||||
version_config = {
|
||||
'5.1.2': MIRRI_12052023_VALLIDATION_CONF,
|
||||
'date': '12/05/2023'
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user