First import

2022-02-18 12:09:05 +01:00 · 2022-02-18 12:09:05 +01:00 · 332876f58c
commit 332876f58c
73 changed files with 12572 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,19 @@
 # MIRRI Utils
 ## Installation
 > pip install path_to_package.tar.gz
 ## Description
 A small set of utilities to deal with Mirri Data.
 -  A data class to deal with strain data.
 - An excel reader for mirri specification
 - An excel validator for mirri specification
 - An excel writer to create the excel with MIRRI specifications
--- a/bin/delete_duplicated_strain_by_number.py
+++ b/bin/delete_duplicated_strain_by_number.py
@ -0,0 +1,77 @@
 #!/usr/bin/env python3
 import argparse
 import sys
 from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
 from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS, STRAIN_WS
 SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
 def get_cmd_args():
    desc = "Upload strains to MIRRI-IS"
    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument('-a', '--accession_number', required=True,
                        help='Delete the duplicated items in database for the given accession number')
    parser.add_argument('-u', '--ws_user', help='Username of the web service',
                        required=True)
    parser.add_argument('-p', '--ws_password', required=True,
                        help='Password of the web service user')
    parser.add_argument('-c', '--client_id', required=True,
                        help='Client id of the web service')
    parser.add_argument('-s', '--client_secret', required=True,
                        help='Client secret of the web service')
    args = parser.parse_args()
    return {'accession_number': args.accession_number, 'user': args.ws_user,
            'password': args.ws_password, 'client_id': args.client_id,
            'client_secret': args.client_secret}
 def write_errors_in_screen(errors, fhand=sys.stderr):
    for key, errors_by_type in errors.items():
        fhand.write(f'{key}\n')
        fhand.write('-' * len(key) + '\n')
        for error in errors_by_type:
            if error.pk:
                fhand.write(f'{error.pk}: ')
            fhand.write(f'{error.message} - {error.code}\n')
        fhand.write('\n')
 def main():
    args = get_cmd_args()
    out_fhand = sys.stdout
    client = BiolomicsMirriClient(server_url=SERVER_URL,  api_version= 'v2',
                                  client_id=args['client_id'],
                                  client_secret=args['client_secret'],
                                  username=args['user'],
                                  password=args['password'])
    query = {"Query": [{"Index": 0,
                        "FieldName": "Collection accession number",
                        "Operation": "TextExactMatch",
                        "Value": args['accession_number']}],
             "Expression": "Q0",
             "DisplayStart": 0,
             "DisplayLength": 10}
    result = client.search(STRAIN_WS, query=query)
    total = result["total"]
    if total == 0:
        out_fhand.write('Accession not in database\n')
        sys.exit(0)
        return None
    elif total == 1:
        out_fhand.write('Accession is not duplicated\n')
        sys.exit(0)
    print(f'Duplicates found: {total}. removing duplicates')
    duplicated_ids = [record.record_id  for record in result['records']]
    for duplicated_id in duplicated_ids[:-1]:
        client.delete_by_id(STRAIN_WS, duplicated_id)
 if __name__ == '__main__':
    main()
--- a/bin/delete_mirri_data.py
+++ b/bin/delete_mirri_data.py
@ -0,0 +1,91 @@
 #!/usr/bin/env python3
 import argparse
 import sys
 from mirri.biolomics.pipelines.strain import retrieve_strain_by_accession_number
 from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
 from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS, STRAIN_WS
 from mirri.io.parsers.mirri_excel import parse_mirri_excel
 from mirri.validation.excel_validator import validate_mirri_excel
 SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
 def get_cmd_args():
    desc = "Upload strains to MIRRI-IS"
    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument('-i', '--input', help='Validated Excel file',
                        type=argparse.FileType('rb'), required=True)
    parser.add_argument('-v', '--spec_version', default='20200601',
                        help='Version of he specification of the given excel file')
    parser.add_argument('-u', '--ws_user', help='Username of the web service',
                        required=True)
    parser.add_argument('-p', '--ws_password', required=True,
                        help='Password of the web service user')
    parser.add_argument('-c', '--client_id', required=True,
                        help='Client id of the web service')
    parser.add_argument('-s', '--client_secret', required=True,
                        help='Client secret of the web service')
    parser.add_argument('-f', '--force_update', required=False,
                        action='store_true',
                        help='Use it if you want to update the existing strains')
    args = parser.parse_args()
    return {'input_fhand': args.input, 'user': args.ws_user,
            'version': args.spec_version,
            'password': args.ws_password, 'client_id': args.client_id,
            'client_secret': args.client_secret, 'update': args.force_update}
 def write_errors_in_screen(errors, fhand=sys.stderr):
    for key, errors_by_type in errors.items():
        fhand.write(f'{key}\n')
        fhand.write('-' * len(key) + '\n')
        for error in errors_by_type:
            if error.pk:
                fhand.write(f'{error.pk}: ')
            fhand.write(f'{error.message} - {error.code}\n')
        fhand.write('\n')
 def main():
    args = get_cmd_args()
    input_fhand = args['input_fhand']
    spec_version = args['version']
    out_fhand = sys.stderr
    error_log = validate_mirri_excel(input_fhand, version=spec_version)
    errors = error_log.get_errors()
    if errors:
        write_errors_in_screen(errors, out_fhand)
        sys.exit(1)
    input_fhand.seek(0)
    parsed_objects = parse_mirri_excel(input_fhand, version=spec_version)
    strains = list(parsed_objects['strains'])
    growth_media = list(parsed_objects['growth_media'])
    client = BiolomicsMirriClient(server_url=SERVER_URL,  api_version= 'v2',
                                  client_id=args['client_id'],
                                  client_secret=args['client_secret'],
                                  username=args['user'],
                                  password=args['password'])
    for gm in growth_media:
        try:
            client.delete_by_name(GROWTH_MEDIUM_WS, gm.acronym)
        except ValueError as error:
            print(error)
            continue
        print(f'Growth medium {gm.acronym} deleted')
    for strain in strains:
        ws_strain = retrieve_strain_by_accession_number(client, strain.id.strain_id)
        if ws_strain is not None:
            client.delete_by_id(STRAIN_WS, ws_strain.record_id)
            print(f'Strain {strain.id.strain_id} deleted')
        else:
            print(f'Strain {strain.id.strain_id} not in database')
 if __name__ == '__main__':
    main()
--- a/bin/upload_strains_to_mirri_is.py
+++ b/bin/upload_strains_to_mirri_is.py
@ -0,0 +1,182 @@
 #!/usr/bin/env python3
 import argparse
 import sys
 from collections import Counter
 from mirri.biolomics.pipelines.growth_medium import get_or_create_or_update_growth_medium
 from mirri.biolomics.pipelines.strain import get_or_create_or_update_strain
 from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
 from mirri.io.parsers.mirri_excel import parse_mirri_excel
 from mirri.validation.excel_validator import validate_mirri_excel
 TEST_SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
 PROD_SERVER_URL = 'https://webservices.bio-aware.com/mirri'
 def get_cmd_args():
    desc = "Upload strains to MIRRI-IS"
    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument('-i', '--input', help='Validated Excel file',
                        type=argparse.FileType('rb'), required=True)
    parser.add_argument('-v', '--spec_version', default='20200601',
                        help='Version of he specification of the given excel file')
    parser.add_argument('-u', '--ws_user', help='Username of the web service',
                        required=True)
    parser.add_argument('-p', '--ws_password', required=True,
                        help='Password of the web service user')
    parser.add_argument('-c', '--client_id', required=True,
                        help='Client id of the web service')
    parser.add_argument('-s', '--client_secret', required=True,
                        help='Client secret of the web service')
    parser.add_argument('--force_update', required=False,
                        action='store_true',
                        help='Use it if you want to update the existing strains')
    parser.add_argument('--verbose', action='store_true',
                        help='use it if you want a verbose output')
    parser.add_argument('--prod', action='store_true',
                        help='Use production server')
    parser.add_argument('--dont_add_gm', action='store_false',
                        help="Don't add growth media", default=True)
    parser.add_argument('--dont_add_strains', action='store_false',
                        help="Don't add growth media", default=True)
    parser.add_argument('--skip_first_num', type=int,
                       help='skip first X strains to the tool')
    args = parser.parse_args()
    return {'input_fhand': args.input, 'user': args.ws_user,
            'version': args.spec_version,
            'password': args.ws_password, 'client_id': args.client_id,
            'client_secret': args.client_secret, 'update': args.force_update,
            'verbose': args.verbose, 'use_production_server': args.prod,
            'add_gm': args.dont_add_gm, 'add_strains': args.dont_add_strains,
            'skip_first_num': args.skip_first_num}
 def write_errors_in_screen(errors, fhand=sys.stderr):
    for key, errors_by_type in errors.items():
        fhand.write(f'{key}\n')
        fhand.write('-' * len(key) + '\n')
        for error in errors_by_type:
            if error.pk:
                fhand.write(f'{error.pk}: ')
            fhand.write(f'{error.message} - {error.code}\n')
        fhand.write('\n')
 def create_or_upload_strains(client, strains, update=False, counter=None,
                             out_fhand=None, seek=None):
    for index, strain in enumerate(strains):
        if seek is not None and index < seek:
            continue
        # if strain.id.strain_id != 'CECT 5766':
        #     continue
        result = get_or_create_or_update_strain(client, strain, update=update)
        new_strain = result['record']
        created = result['created']
        updated = result.get('updated', False)
        if updated:
            result_state = 'updated'
        elif created:
            result_state = 'created'
        else:
            result_state = 'not modified'
        if counter is not None:
            counter[result_state] += 1
        if out_fhand is not None:
            out_fhand.write(f'{index}: Strain {new_strain.id.strain_id}: {result_state}\n')
        # break
 def create_or_upload_growth_media(client, growth_media, update=False, counter=None,
                                  out_fhand=None):
    for gm in growth_media:
        result = get_or_create_or_update_growth_medium(client, gm, update)
        new_gm = result['record']
        created = result['created']
        updated = result.get('updated', False)
        if updated:
            result_state = 'updated'
        elif created:
            result_state = 'created'
        else:
            result_state = 'not modified'
        if counter is not None:
            counter[result_state] += 1
        if out_fhand is not None:
            out_fhand.write(f'Growth medium {new_gm.record_name}: {result_state}\n')
 def main():
    args = get_cmd_args()
    input_fhand = args['input_fhand']
    spec_version = args['version']
    out_fhand = sys.stdout
    error_log = validate_mirri_excel(input_fhand, version=spec_version)
    errors = error_log.get_errors()
    skip_first_num = args['skip_first_num']
    if errors:
        write_errors_in_screen(errors, out_fhand)
        sys.exit(1)
    input_fhand.seek(0)
    parsed_objects = parse_mirri_excel(input_fhand, version=spec_version)
    strains = list(parsed_objects['strains'])
    growth_media = list(parsed_objects['growth_media'])
    server_url = PROD_SERVER_URL if args['use_production_server'] else TEST_SERVER_URL
    client = BiolomicsMirriClient(server_url=server_url,  api_version='v2',
                                  client_id=args['client_id'],
                                  client_secret=args['client_secret'],
                                  username=args['user'],
                                  password=args['password'],
                                  verbose=args['verbose'])
    if args['add_gm']:
        client.start_transaction()
        counter = Counter()
        try:
            create_or_upload_growth_media(client, growth_media, update=args['update'],
                                          counter=counter, out_fhand=out_fhand)
        except (Exception, KeyboardInterrupt) as error:
            out_fhand.write('There were some errors in the Growth media upload\n')
            out_fhand.write(str(error) + '\n')
            out_fhand.write('Rolling back\n')
            client.rollback()
            raise
        client.finish_transaction()
        show_stats(counter, 'Growth Media', out_fhand)
    if args['add_strains']:
        client.start_transaction()
        counter = Counter()
        try:
            create_or_upload_strains(client, strains, update=args['update'],
                                     counter=counter,
                                     out_fhand=out_fhand, seek=skip_first_num)
            client.finish_transaction()
        except (Exception, KeyboardInterrupt) as error:
            out_fhand.write('There were some errors in the Strain upload\n')
            out_fhand.write(str(error) + '\n')
            out_fhand.write('rolling back\n')
            # client.rollback()
            raise
        client.finish_transaction()
        show_stats(counter, 'Strains', out_fhand)
 def show_stats(counter, kind, out_fhand):
    out_fhand.write(f'{kind}\n')
    line = ''.join(['-'] * len(kind))
    out_fhand.write(f"{line}\n")
    for kind2, value in counter.most_common(5):
        out_fhand.write(f'{kind2}: {value}\n')
    out_fhand.write('\n')
 if __name__ == '__main__':
    main()
--- a/bin/validate.py
+++ b/bin/validate.py
@ -0,0 +1,19 @@
 #!/usr/bin/env python
 import sys
 from pathlib import Path
 from mirri.validation.excel_validator import validate_mirri_excel
 import warnings
 warnings.simplefilter("ignore")
 def main():
    path = Path(sys.argv[1])
    error_log = validate_mirri_excel(path.open("rb"))
    for errors in error_log.get_errors().values():
        for error in errors:
            print(error.pk, error.message, error.code)
 if __name__ == "__main__":
    main()
--- a/docs/Error_Log_Style_Sheet.docx
+++ b/docs/Error_Log_Style_Sheet.docx
--- a/docs/ICT-TaskForce_HowToCompileTheSheets_v20200601.pdf
+++ b/docs/ICT-TaskForce_HowToCompileTheSheets_v20200601.pdf
--- a/docs/ICT-TaskForce_RecommendationsToCollections_v20200601.pdf
+++ b/docs/ICT-TaskForce_RecommendationsToCollections_v20200601.pdf
--- a/mirri/TODO.txt
+++ b/mirri/TODO.txt
@ -0,0 +1,61 @@
 Ontobiotope term. just one field in dataset, two fields in biolomics
 Altitude. Field and in Coordinates
 Geographic origin: field and Entry in other table
 Ploidy: How is this field formated?haploid/diploid or 1, 2, 3...
 Best strategy:
 My class has
    - strain data
    - geographic data
    - literature
    - sequences
 No not a valid value for Strain from a registered collection, Allowed values: ?. no. yes
 yes not a valid value for GMO, Allowed values: ?. No. Yes
 Organism Type:
    firstuppercase in deposit
    lower case in retrieve
 Taxon name is a list in retrieve
 null values:
 'Comment on taxonomy' = '' could be null
 'Coordinates of geographic origin':{Longitude, lati... 'NaN' could be null
 'Date of inclusion in the catalogue' = '' could be null
 'Enzyme production'= '' could be null
 'Ploidy':'?' could be null
 Deposit date
 --------------------------------------------
 - Assign seq to strain in strain serializers
 - Fields in ws that are not in our specification. What to do with them
    - Type description - IGNORE
    - Associated documents - IGNORE
    - Data provided by - IGNORE
    - Orders - IGNORE
    - MTA text - IGNORE
    - Catalog URL -
 - Publication RecordName assignation. How to do it?
 - Sequence RecordName assignation. How to do it
 - Publications serializer improvement
 ------------------------------------------------------
 Marker Name. Which options are allowed in WS and how they map to the types in specifications?
 update: it should be done in the detail url.
 interspecific_hybrid set to "no" by default in web service if no value given.
 Tested temperature growth range {'max': 0.0, 'min': 0.0} when added empty
 Very slow: A normal search action takes
--- a/mirri/init.py
+++ b/mirri/init.py
@ -0,0 +1,21 @@
 import functools
 def rgetattr(obj, attr, *args):
    def _getattr(obj, attr):
        return getattr(obj, attr, *args)
    return functools.reduce(_getattr, [obj] + attr.split('.'))
 def rsetattr(obj, attr, val):
    pre, _, post = attr.rpartition('.')
    return setattr(rgetattr(obj, pre) if pre else obj, post, val)
 # using wonder's beautiful simplification:
 # https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects/31174427?noredirect=1#comment86638618_31174427
 class ValidationError(Exception):
    pass
--- a/mirri/biolomics/init.py
+++ b/mirri/biolomics/init.py
--- a/mirri/biolomics/pipelines/init.py
+++ b/mirri/biolomics/pipelines/init.py
--- a/mirri/biolomics/pipelines/growth_medium.py
+++ b/mirri/biolomics/pipelines/growth_medium.py
@ -0,0 +1,44 @@
 from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
 from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS
 from mirri.entities.growth_medium import GrowthMedium
 from mirri.biolomics.serializers.growth_media import get_growth_medium_record_name
 def get_or_create_or_update_growth_medium(client: BiolomicsMirriClient,
                                          growth_medium: GrowthMedium,
                                          update=False):
    response = get_or_create_growth_medium(client, growth_medium)
    new_gm = response['record']
    created = response['created']
    if created:
        return {'record': new_gm, 'created': created, 'updated': False}
    if not update:
        return {'record': new_gm, 'created': False, 'updated': False}
    # compare_strains
    if growth_medium.is_equal(new_gm, exclude_fields=['record_id', 'record_name', 'acronym']):
        records_are_different = False
    else:
        growth_medium.update(new_gm, include_fields=['record_id', 'record_name'])
        records_are_different = True
    if records_are_different:
        updated_gm = client.update(GROWTH_MEDIUM_WS, growth_medium)
        updated = True
    else:
        updated_gm = new_gm
        updated = False
    return {'record': updated_gm, 'created': False, 'updated': updated}
 def get_or_create_growth_medium(client: BiolomicsMirriClient,
                                growth_medium: GrowthMedium):
    record_name = get_growth_medium_record_name(growth_medium)
    gm = client.retrieve_by_name(GROWTH_MEDIUM_WS, record_name)
    if gm is not None:
        return {'record': gm, 'created': False}
    new_gm = client.create(GROWTH_MEDIUM_WS, growth_medium)
    return {'record': new_gm, 'created': True}
--- a/mirri/biolomics/pipelines/strain.py
+++ b/mirri/biolomics/pipelines/strain.py
@ -0,0 +1,122 @@
 from pprint import pprint
 import deepdiff
 from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient, BIBLIOGRAPHY_WS, SEQUENCE_WS, STRAIN_WS
 from mirri.biolomics.serializers.sequence import GenomicSequenceBiolomics
 from mirri.biolomics.serializers.strain import StrainMirri
 from mirri.entities.publication import Publication
 def retrieve_strain_by_accession_number(client, accession_number):
    query = {"Query": [{"Index": 0,
                        "FieldName": "Collection accession number",
                        "Operation": "TextExactMatch",
                        "Value": accession_number}],
             "Expression": "Q0",
             "DisplayStart": 0,
             "DisplayLength": 10}
    result = client.search(STRAIN_WS, query=query)
    total = result["total"]
    if total == 0:
        return None
    elif total == 1:
        return result["records"][0]
    else:
        msg = f"More than one entries for {accession_number} in database"
        raise ValueError(msg)
 def get_or_create_publication(client: BiolomicsMirriClient, pub: Publication):
    new_pub = client.retrieve_by_name(BIBLIOGRAPHY_WS, pub.title)
    if new_pub is not None:
        return {'record': new_pub, 'created': False}
    new_pub = client.create(BIBLIOGRAPHY_WS, pub)
    return {'record': new_pub, 'created': True}
 def get_or_create_sequence(client: BiolomicsMirriClient, sequence: GenomicSequenceBiolomics):
    seq = client.retrieve_by_name(SEQUENCE_WS, sequence.marker_id)
    if seq is not None:
        return {'record': seq, 'created': False}
    new_seq = client.create(SEQUENCE_WS, sequence)
    return {'record': new_seq, 'created': True}
 def get_or_create_or_update_strain(client: BiolomicsMirriClient,
                                   record: StrainMirri, update=False):
    response = get_or_create_strain(client, record)
    new_record = response['record']
    created = response['created']
    if created:
        return {'record': new_record, 'created': True, 'updated': False}
    if not update:
        return {'record': new_record, 'created': False, 'updated': False}
    if record.record_id is None:
        record.record_id = new_record.record_id
    if record.record_name is None:
        record.record_name = new_record.record_name
    if record.synonyms is None or record.synonyms == []:
        record.synonyms = new_record.synonyms
    # compare_strains
    # we exclude pub id as it is an internal reference of pub and can be changed
    diffs = deepdiff.DeepDiff(new_record.dict(), record.dict(),
                              ignore_order=True, exclude_paths=None,
                              exclude_regex_paths=[r"root\[\'publications\'\]\[\d+\]\[\'id\'\]",
                                                   r"root\[\'publications\'\]\[\d+\]\[\'RecordId\'\]",
                                                   r"root\[\'genetics\'\]\[\'Markers\'\]\[\d+\]\[\'RecordId\'\]",
                                                   r"root\[\'genetics\'\]\[\'Markers\'\]\[\d+\]\[\'RecordName\'\]"])
    if diffs:
        pprint(diffs,  width=200)
        # pprint('en el que yo mando')
        # pprint(record.dict())
        # pprint('lo que hay en db')
        # pprint(new_record.dict())
    records_are_different = True if diffs else False
    if records_are_different:
        updated_record = update_strain(client, record)
        updated = True
    else:
        updated_record = record
        updated = False
    return {'record': updated_record, 'created': False, 'updated': updated}
 def get_or_create_strain(client: BiolomicsMirriClient, strain: StrainMirri):
    new_strain = retrieve_strain_by_accession_number(client, strain.id.strain_id)
    if new_strain is not None:
        return {'record': new_strain, 'created': False}
    new_strain = create_strain(client, strain)
    return {'record': new_strain, 'created': True}
 def create_strain(client: BiolomicsMirriClient, strain: StrainMirri):
    for pub in strain.publications:
        creation_response = get_or_create_publication(client, pub)
    for marker in strain.genetics.markers:
        creation_response = get_or_create_sequence(client, marker)
    new_strain = client.create(STRAIN_WS, strain)
    return new_strain
 def update_strain(client: BiolomicsMirriClient, strain: StrainMirri):
    for pub in strain.publications:
        creation_response = get_or_create_publication(client, pub)
    for marker in strain.genetics.markers:
        creation_response = get_or_create_sequence(client, marker)
    new_strain = client.update(STRAIN_WS, strain)
    return new_strain
--- a/mirri/biolomics/remote/init.py
+++ b/mirri/biolomics/remote/init.py
--- a/mirri/biolomics/remote/biolomics_client.py
+++ b/mirri/biolomics/remote/biolomics_client.py
@ -0,0 +1,210 @@
 from mirri.biolomics.remote.endoint_names import (SEQUENCE_WS, STRAIN_WS,
                                                  GROWTH_MEDIUM_WS, TAXONOMY_WS,
                                                  COUNTRY_WS, ONTOBIOTOPE_WS,
                                                  BIBLIOGRAPHY_WS)
 from mirri.biolomics.remote.rest_client import BiolomicsClient
 from mirri.biolomics.serializers.sequence import (
    serialize_to_biolomics as sequence_to_biolomics,
    serialize_from_biolomics as sequence_from_biolomics)
 from mirri.biolomics.serializers.strain import (
    serialize_to_biolomics as strain_to_biolomics,
    serialize_from_biolomics as strain_from_biolomics)
 from mirri.biolomics.serializers.growth_media import (
    serialize_to_biolomics as growth_medium_to_biolomics,
    serialize_from_biolomics as growth_medium_from_biolomics)
 from mirri.biolomics.serializers.taxonomy import (
    serialize_from_biolomics as taxonomy_from_biolomics)
 from mirri.biolomics.serializers.locality import (
    serialize_from_biolomics as country_from_biolomics)
 from mirri.biolomics.serializers.ontobiotope import (
    serialize_from_biolomics as ontobiotope_from_biolomics)
 from mirri.biolomics.serializers.bibliography import (
    serializer_from_biolomics as bibliography_from_biolomics,
    serializer_to_biolomics as bibliography_to_biolomics
 )
 from pprint import pprint
 class BiolomicsMirriClient:
    _conf = {
        SEQUENCE_WS: {
            'serializers': {'to': sequence_to_biolomics,
                            'from': sequence_from_biolomics},
            'endpoint': 'WS Sequences'},
        STRAIN_WS: {
            'serializers': {'to': strain_to_biolomics,
                            'from': strain_from_biolomics},
            'endpoint': 'WS Strains'},
        GROWTH_MEDIUM_WS: {
            'serializers':  {'from': growth_medium_from_biolomics,
                             'to': growth_medium_to_biolomics},
            'endpoint': 'WS Growth media'},
        TAXONOMY_WS: {
            'serializers':  {'from': taxonomy_from_biolomics},
            'endpoint': 'WS Taxonomy'},
        COUNTRY_WS: {
            'serializers':  {'from': country_from_biolomics},
            'endpoint': 'WS Locality'},
        ONTOBIOTOPE_WS: {
            'serializers':  {'from': ontobiotope_from_biolomics},
            'endpoint': 'WS Ontobiotope'},
        BIBLIOGRAPHY_WS: {
            'serializers': {'from': bibliography_from_biolomics,
                            'to':  bibliography_to_biolomics},
            'endpoint':  'WS Bibliography'
        }
    }
    def __init__(self, server_url, api_version, client_id, client_secret, username,
                 password, website_id=1, verbose=False):
        _client = BiolomicsClient(server_url, api_version, client_id,
                                  client_secret, username, password,
                                  website_id=website_id, verbose=verbose)
        self.client = _client
        self.schemas = self.client.get_schemas()
        self.allowed_fields = self.client.allowed_fields
        self._transaction_created_ids = None
        self._in_transaction = False
        self._verbose = verbose
    def _initialize_transaction_storage(self):
        if self._in_transaction:
            msg = 'Can not initialize transaction if already in a transaction'
            raise RuntimeError(msg)
        self._transaction_created_ids = []
    def _add_created_to_transaction_storage(self, response, entity_name):
        if not self._in_transaction:
            msg = 'Can not add ids to transaction storage if not in a transaction'
            raise RuntimeError(msg)
        id_ = response.json().get('RecordId', None)
        if id_ is not None:
            ws_endpoint_name = self._conf[entity_name]['endpoint']
            self._transaction_created_ids.insert(0, (ws_endpoint_name, id_))
    def start_transaction(self):
        self._initialize_transaction_storage()
        self._in_transaction = True
    def finish_transaction(self):
        self._in_transaction = False
        self._transaction_created_ids = None
    def get_endpoint(self, entity_name):
        return self._conf[entity_name]['endpoint']
    def get_serializers_to(self, entity_name):
        return self._conf[entity_name]['serializers']['to']
    def get_serializers_from(self, entity_name):
        return self._conf[entity_name]['serializers']['from']
    def retrieve_by_name(self, entity_name, name):
        endpoint = self.get_endpoint(entity_name)
        serializer_from = self.get_serializers_from(entity_name)
        response = self.client.find_by_name(endpoint, name=name)
        if response.status_code == 404:
            return None
        elif response.status_code != 200:
            raise ValueError(f"{response.status_code}: {response.text}")
        ws_entity = response.json()
        return None if ws_entity is None else serializer_from(ws_entity,
                                                              client=self)
    def retrieve_by_id(self, entity_name, _id):
        endpoint = self.get_endpoint(entity_name)
        serializer_from = self.get_serializers_from(entity_name)
        response = self.client.retrieve(endpoint, record_id=_id)
        if response.status_code == 404:
            return None
        elif response.status_code != 200:
            raise ValueError(f"{response.status_code}: {response.text}")
        ws_entity = response.json()
        return serializer_from(ws_entity, client=self)
    def create(self, entity_name, entity):
        endpoint = self.get_endpoint(entity_name)
        serializer_to = self.get_serializers_to(entity_name)
        serializer_from = self.get_serializers_from(entity_name)
        data = serializer_to(entity, client=self)
        response = self.client.create(endpoint, data=data)
        if response.status_code == 200:
            if self._in_transaction:
                self._add_created_to_transaction_storage(response, entity_name)
            return serializer_from(response.json(), client=self)
        else:
            msg = f"return_code: {response.status_code}. msg: {response.json()['errors']['Value']}"
            raise RuntimeError(msg)
    def delete_by_id(self, entity_name, record_id):
        endpoint = self.get_endpoint(entity_name)
        response = self.client.delete(endpoint, record_id=record_id)
        if response.status_code != 200:
            error = response.json()
            # msg = f'{error["Title"]: {error["Details"]}}'
            raise RuntimeError(error)
    def delete_by_name(self, entity_name, record_name):
        endpoint = self.get_endpoint(entity_name)
        response = self.client.find_by_name(endpoint, record_name)
        if response.status_code != 200:
            error = response.json()
            # msg = f'{error["Title"]: {error["Details"]}}'
            raise RuntimeError(error)
        try:
            record_id = response.json()['RecordId']
        except TypeError:
            raise ValueError(f'The given record_name {record_name} does not exists')
        self.delete_by_id(entity_name, record_id=record_id)
    def search(self, entity_name, query):
        endpoint = self.get_endpoint(entity_name)
        serializer_from = self.get_serializers_from(entity_name)
        response = self.client.search(endpoint, search_query=query)
        if response.status_code != 200:
            error = response.json()
            # msg = f'{error["Title"]: {error["Details"]}}'
            raise RuntimeError(error)
        search_result = response.json()
        # pprint(search_result)
        result = {'total': search_result['TotalCount'],
                  'records': [serializer_from(record, client=self)
                                for record in search_result['Records']]}
        return result
    def update(self, entity_name, entity):
        record_id = entity.record_id
        if record_id is None:
            msg = 'In order to update the record, you need the recordId in the entity'
            raise ValueError(msg)
        endpoint = self.get_endpoint(entity_name)
        serializer_to = self.get_serializers_to(entity_name)
        serializer_from = self.get_serializers_from(entity_name)
        data = serializer_to(entity, client=self, update=True)
        # print('update')
        # pprint(entity.dict())
        # print(data)
        # pprint(data, width=200)
        response = self.client.update(endpoint, record_id=record_id, data=data)
        if response.status_code == 200:
            # print('receive')
            # pprint(response.json())
            entity = serializer_from(response.json(), client=self)
            # pprint(entity.dict())
            return entity
        else:
            msg = f"return_code: {response.status_code}. msg: {response.text}"
            raise RuntimeError(msg)
    def rollback(self):
        self._in_transaction = False
        self.client.rollback(self._transaction_created_ids)
        self._transaction_created_ids = None
--- a/mirri/biolomics/remote/endoint_names.py
+++ b/mirri/biolomics/remote/endoint_names.py
@ -0,0 +1,7 @@
 SEQUENCE_WS = 'sequence'
 STRAIN_WS = 'strain'
 GROWTH_MEDIUM_WS = 'growth_medium'
 TAXONOMY_WS = 'taxonomy'
 COUNTRY_WS = 'country'
 ONTOBIOTOPE_WS = 'ontobiotope'
 BIBLIOGRAPHY_WS = 'bibliography'
--- a/mirri/biolomics/remote/rest_client.py
+++ b/mirri/biolomics/remote/rest_client.py
@ -0,0 +1,214 @@
 import time
 import re
 import sys
 import requests
 from requests_oauthlib import OAuth2Session
 from oauthlib.oauth2 import LegacyApplicationClient
 from oauthlib.oauth2.rfc6749.errors import InvalidGrantError
 from mirri.entities.strain import ValidationError
 class BiolomicsClient:
    schemas = None
    allowed_fields = None
    def __init__(self, server_url, api_version, client_id, client_secret,
                 username, password, website_id=1, verbose=False):
        self._client_id = client_id
        self._client_secret = client_secret
        self._username = username
        self._password = password
        self._client = None
        self.server_url = server_url
        self._api_version = api_version
        self._auth_url = self.server_url + "/connect/token"
        self.access_token = None
        self.website_id = website_id
        self._verbose = verbose
        self._schema = self.get_schemas()
    def get_access_token(self):
        if self._client is None:
            self._client = LegacyApplicationClient(client_id=self._client_id)
            authenticated = False
        else:
            expires_at = self._client.token["expires_at"]
            authenticated = expires_at > time.time()
        if not authenticated:
            oauth = OAuth2Session(client=self._client)
            try:
                token = oauth.fetch_token(
                    token_url=self._auth_url,
                    username=self._username,
                    password=self._password,
                    client_id=self._client_id,
                    client_secret=self._client_secret,
                )
            except InvalidGrantError:
                oauth.close()
                raise
            self.access_token = token["access_token"]
            oauth.close()
        return self.access_token
    def _build_headers(self):
        self.get_access_token()
        return {
            "accept": "application/json",
            "websiteId": str(self.website_id),
            "Authorization": f"Bearer {self.access_token}",
        }
    def get_detail_url(self, end_point, record_id, api_version=None):
        # api_version = self._api_version if api_version is None else api_version
        if api_version:
            return "/".join([self.server_url, api_version, 'data',
                             end_point, str(record_id)])
        else:
            return "/".join([self.server_url, 'data', end_point, str(record_id)])
    def get_list_url(self, end_point):
        return "/".join([self.server_url, 'data', end_point])
        # return "/".join([self.server_url, self._api_version, 'data', end_point])
    def get_search_url(self, end_point):
        return "/".join([self.server_url, self._api_version, 'search', end_point])
    def get_find_by_name_url(self, end_point):
        return "/".join([self.get_search_url(end_point), 'findByName'])
    def search(self, end_point, search_query):
        self._check_end_point_exists(end_point)
        header = self._build_headers()
        url = self.get_search_url(end_point)
        time0 = time.time()
        response = requests.post(url, json=search_query, headers=header)
        time1 = time.time()
        if self._verbose:
            sys.stdout.write(f'Search to {end_point} request time for {url}: {time1 - time0}\n')
        return response
    def retrieve(self, end_point, record_id):
        self._check_end_point_exists(end_point)
        header = self._build_headers()
        url = self.get_detail_url(end_point, record_id, api_version=self._api_version)
        time0 = time.time()
        response = requests.get(url, headers=header)
        time1 = time.time()
        if self._verbose:
            sys.stdout.write(f'Get to {end_point} request time for {url}: {time1-time0}\n')
        return response
    def create(self, end_point, data):
        self._check_end_point_exists(end_point)
        self._check_data_consistency(data, self.allowed_fields[end_point])
        header = self._build_headers()
        url = self.get_list_url(end_point)
        return requests.post(url, json=data, headers=header)
    def update(self, end_point, record_id, data):
        self._check_end_point_exists(end_point)
        self._check_data_consistency(data, self.allowed_fields[end_point],
                                     update=True)
        header = self._build_headers()
        url = self.get_detail_url(end_point, record_id=record_id)
        return requests.put(url, json=data, headers=header)
    def delete(self, end_point, record_id):
        self._check_end_point_exists(end_point)
        header = self._build_headers()
        url = self.get_detail_url(end_point, record_id)
        return requests.delete(url, headers=header)
    def find_by_name(self, end_point, name):
        self._check_end_point_exists(end_point)
        header = self._build_headers()
        url = self.get_find_by_name_url(end_point)
        response = requests.get(url, headers=header, params={'name': name})
        return response
    def get_schemas(self):
        if self.schemas is None:
            headers = self._build_headers()
            url = self.server_url + '/schemas'
            response = requests.get(url, headers=headers)
            if response.status_code == 200:
                self.schemas = response.json()
            else:
                raise ValueError(f"{response.status_code}: {response.text}")
        if self.allowed_fields is None:
            self.allowed_fields = self._process_schema(self.schemas)
        return self.schemas
    @staticmethod
    def _process_schema(schemas):
        schema = schemas[0]
        allowed_fields = {}
        for endpoint_schema in schema['TableViews']:
            endpoint_name = endpoint_schema['TableViewName']
            endpoint_values = endpoint_schema['ResultFields']
            fields = {field['title']: field for field in endpoint_values}
            allowed_fields[endpoint_name] = fields
        return allowed_fields
    def _check_end_point_exists(self, endpoint):
        if endpoint not in self.allowed_fields.keys():
            raise ValueError(f'{endpoint} not a recognised endpoint')
    def _check_data_consistency(self, data, allowed_fields, update=False):
        update_mandatory = set(['RecordDetails', 'RecordName', 'RecordId'])
        if update and not update_mandatory.issubset(data.keys()):
            msg = 'Updating data keys must be RecordDetails, RecordName and RecordId'
            raise ValidationError(msg)
        if not update and set(data.keys()).difference(['RecordDetails', 'RecordName', 'Acronym']):
            msg = 'data keys must be RecordDetails and RecordName or Acronym'
            raise ValidationError(msg)
        for field_name, field_value in data['RecordDetails'].items():
            if field_name not in allowed_fields:
                raise ValidationError(f'{field_name} not in allowed fields')
            field_schema = allowed_fields[field_name]
            self._check_field_schema(field_name, field_schema, field_value)
    @staticmethod
    def _check_field_schema(field_name, field_schema, field_value):
        if field_schema['FieldType'] != field_value['FieldType']:
            msg = f"Bad FieldType ({field_value['FieldType']}) for {field_name}. "
            msg += f"It should be {field_schema['FieldType']}"
            raise ValidationError(msg)
        states = field_schema['states'] if 'states' in field_schema else None
        if states:
            states = [re.sub(r" *\(.*\)", "", s) for s in states]
        subfields = field_schema['subfields'] if 'subfields' in field_schema else None
        if subfields is not None and states is not None:
            subfield_names = [subfield['SubFieldName']
                              for subfield in subfields if subfield['IsUsed']]
            for val in field_value['Value']:
                if val['Name'] not in subfield_names:
                    msg = f"{field_name}: {val['Name']} not in {subfield_names}"
                    raise ValidationError(msg)
                if val['Value'] not in states:
                    msg = f"{field_value['Value']} not a valid value for "
                    msg += f"{field_name}, Allowed values: {'. '.join(states)}"
                    raise ValidationError(msg)
        elif states is not None:
            if field_value['Value'] not in states:
                msg = f"{field_value['Value']} not a valid value for "
                msg += f"{field_name}, Allowed values: {'. '.join(states)}"
                raise ValidationError(msg)
    def rollback(self, created_ids):
        for endpoint, id_ in created_ids:
            try:
                self.delete(end_point=endpoint, record_id=id_)
            except Exception:
                pass
--- a/mirri/biolomics/serializers/init.py
+++ b/mirri/biolomics/serializers/init.py
@ -0,0 +1,3 @@
 RECORD_ID = 'RecordId'
 RECORD_NAME = 'RecordName'
 RECORD_DETAILS = 'RecordDetails'
--- a/mirri/biolomics/serializers/bibliography.py
+++ b/mirri/biolomics/serializers/bibliography.py
@ -0,0 +1,82 @@
 from typing import List
 from mirri import rgetattr
 from mirri.entities.publication import Publication
 from mirri.biolomics.settings import PUB_MIRRI_FIELDS
 RECORD_ID = 'RecordId'
 RECORD_NAME = 'RecordName'
 PUB_MAPPING = {
    # 'record_id': 'RecordId',
    # 'record_name': 'RecordName',
    'strains': "Associated strains",
    'taxa': "Associated taxa",
    'authors': "Authors",
    # 'sequneces': "Associated sequences",
    # 'abstract': "Abstract",
    # 'collection': "Collection",
    'doi': "DOI number",
    'editor': "Editor(s)",
    # 'full_reference': "Full reference",
    # 'link': "Hyperlink",
    'isbn': "ISBN",
    'issn': "ISSN",
    'issue': "Issue",
    'journal': "Journal",
    'journal_book': "Journal-Book",
    # 'keywords': "Keywords",
    'first_page': "Page from",
    'last_page': "Page to",
    'publisher': "Publisher",
    'pubmed_id': "PubMed ID",
    'volume': "Volume",
    'year': "Year",
 }
 REV_PUB_MAPPING = {v: k for k, v in PUB_MAPPING.items()}
 def serializer_from_biolomics(ws_data, client=None) -> Publication:
    pub = Publication()
    pub.record_id = ws_data[RECORD_ID]
    pub.record_name = ws_data[RECORD_NAME]
    pub.title = ws_data[RECORD_NAME]
    for field, value in ws_data['RecordDetails'].items():
        value = value['Value']
        if not value:
            continue
        attr = REV_PUB_MAPPING.get(field, None)
        if not attr:
            continue
        if attr in ('year', 'first_page', 'last_page'):
            value = int(value)
        setattr(pub, attr, value)
    return pub
 def get_publication_record_name(publication):
    if publication.record_name:
        return publication.record_name
    if publication.title:
        return publication.title
    if publication.pubmed_id:
        return f'PUBMED:{publication.pubmed_id}'
    if publication.doi:
        return f'DOI:{publication.doi}'
 def serializer_to_biolomics(publication: Publication, client=None, update=False):
    ws_data = {}
    if publication.record_id:
        ws_data[RECORD_ID] = publication.record_id
    ws_data[RECORD_NAME] = get_publication_record_name(publication)
    details = {}
    for attr, field in PUB_MAPPING.items():
        value = getattr(publication, attr, None)
        if value is None:
            continue
        field_type = 'D' if attr == 'year' else "E"
        details[field] = {'Value': value, 'FieldType': field_type}
    ws_data['RecordDetails'] = details
    return ws_data
--- a/mirri/biolomics/serializers/growth_media.py
+++ b/mirri/biolomics/serializers/growth_media.py
@ -0,0 +1,66 @@
 from mirri.biolomics.serializers import RECORD_ID, RECORD_NAME, RECORD_DETAILS
 from mirri.entities.growth_medium import GrowthMedium
 def serialize_from_biolomics(ws_data, client=None) -> GrowthMedium:
    medium = GrowthMedium()
    medium.record_name = ws_data.get('RecordName', None)
    medium.description = get_growth_medium_record_name(medium)
    medium.record_id = ws_data.get('RecordId', None)
    for key, value in ws_data['RecordDetails'].items():
        value = value['Value']
        if not value:
            continue
        if key == "Full description":
            medium.full_description = value
        if key == "Ingredients":
            medium.ingredients = value
        if key == 'Medium description':
            medium.description = value
        if key == 'Other name':
            medium.other_name= value
        if key == 'pH':
            medium.ph = value
        if key == 'Sterilization conditions':
            medium.sterilization_conditions = value
    return medium
 def get_growth_medium_record_name(growth_medium):
    if growth_medium.record_name:
        return growth_medium.record_name
    if growth_medium.description:
        return growth_medium.description
    if growth_medium.acronym:
        return growth_medium.acronym
 GROWTH_MEDIUM_MAPPING = {
    'acronym': 'Acronym',
    'full_description': "Full description",
    'ingredients': "Ingredients",
    'description': 'Medium description',
    'other_name': 'Other name',
    'ph': 'pH',
    'sterilization_conditions': 'Sterilization conditions'
 }
 def serialize_to_biolomics(growth_medium: GrowthMedium, client=None, update=False):
    ws_data = {}
    if growth_medium.record_id:
        ws_data[RECORD_ID] = growth_medium.record_id
    record_name = get_growth_medium_record_name(growth_medium)
    ws_data[RECORD_NAME] = record_name
    details = {}
    for field in growth_medium.fields:
        if field in ('acronym', 'record_id', 'record_name'):
            continue
        value = getattr(growth_medium, field, None)
        if value is not None:
            details[GROWTH_MEDIUM_MAPPING[field]] = {'Value': value, 'FieldType': 'E'}
    ws_data[RECORD_DETAILS] = details
    return ws_data
--- a/mirri/biolomics/serializers/locality.py
+++ b/mirri/biolomics/serializers/locality.py
@ -0,0 +1,26 @@
 from mirri.entities.location import Location
 def serialize_from_biolomics(ws_data, client=None):
    return ws_data
 # this is a proof of concept
 def serialize_location(location: Location):
    fields = {}
    if location.country:
        fields['Country'] = {'Value': location.country, 'FieldType': 'E'}
    if location.latitude and location.longitude:
        value = {'Latitude': location.latitude,
                 'Longitude': location.longitude}
        if location.coord_uncertainty:
            value['Precision'] = location.coord_uncertainty
        fields['GIS position'] = {'FieldType': 'L', 'Value': value}
    fields['Strains'] = {"FieldType": "RLink", 'Value': [{
        'Name': {'Value': None, 'FieldType': "E"},
        'RecordId': None
    }]}
    return {"RecordDetails": fields,
            "RecordName": location.country}
--- a/mirri/biolomics/serializers/ontobiotope.py
+++ b/mirri/biolomics/serializers/ontobiotope.py
@ -0,0 +1,2 @@
 def serialize_from_biolomics(ws_data, client=None):
    return ws_data
--- a/mirri/biolomics/serializers/sequence.py
+++ b/mirri/biolomics/serializers/sequence.py
@ -0,0 +1,81 @@
 from mirri.entities.sequence import GenomicSequence
 from mirri.biolomics.serializers import RECORD_ID, RECORD_NAME, RECORD_DETAILS
 class GenomicSequenceBiolomics(GenomicSequence):
    def __init__(self, **kwargs):
        super().__init__(freeze=False, **kwargs)
    @property
    def record_id(self) -> int:
        return self._data.get(RECORD_ID, None)
    @record_id.setter
    def record_id(self, value: int):
        self._data[RECORD_ID] = value
    @property
    def record_name(self) -> str:
        return self._data.get(RECORD_NAME, None)
    @record_name.setter
    def record_name(self, value: str):
        self._data[RECORD_NAME] = value
    def dict(self):
        _data = super(GenomicSequenceBiolomics, self).dict()
        if self.record_id:
            _data[RECORD_ID] = self.record_id
        if self.record_name:
            _data[RECORD_NAME] = self.record_name
        return _data
 def serialize_to_biolomics(marker: GenomicSequenceBiolomics, client=None, update=False):
    ws_sequence = {}
    print()
    if marker.record_id:
        ws_sequence[RECORD_ID] = marker.record_id
    if marker.record_name:
        ws_sequence[RECORD_NAME] = marker.record_name
    else:
        ws_sequence[RECORD_NAME] = marker.marker_id
    details = {}
    if marker.marker_id:
        details["INSDC number"] = {"Value": marker.marker_id,
                                   "FieldType": "E"}
    if marker.marker_seq:
        details["DNA sequence"] = {
            "Value": {"Sequence": marker.marker_seq},
            "FieldType": "N"}
    if marker.marker_type:
        details['Marker name'] = {"Value": marker.marker_type, "FieldType": "E"}
    ws_sequence[RECORD_DETAILS] = details
    return ws_sequence
 MAPPING_WS_SPEC_TYPES = {
    'Beta tubulin': 'TUBB'
 }
 def serialize_from_biolomics(ws_data, client=None) -> GenomicSequenceBiolomics:
    marker = GenomicSequenceBiolomics()
    marker.record_id = ws_data[RECORD_ID]
    marker.record_name = ws_data[RECORD_NAME]
    for key, value in ws_data['RecordDetails'].items():
        value = value['Value']
        if key == 'INSDC number' and value:
            marker.marker_id = value
        elif key == 'Marker name' and value:
            kind = MAPPING_WS_SPEC_TYPES.get(value, None)
            value = kind if kind else value
            marker.marker_type = value
        elif key == 'DNA sequence' and 'Sequence' in value and value['Sequence']:
            marker.marker_seq = value['Sequence']
    return marker
--- a/mirri/biolomics/serializers/strain.py
+++ b/mirri/biolomics/serializers/strain.py
@ -0,0 +1,462 @@
 import re
 import sys
 import pycountry
 from mirri import rgetattr, rsetattr
 from mirri.entities.date_range import DateRange
 from mirri.entities.strain import ORG_TYPES, OrganismType, StrainId, StrainMirri, add_taxon_to_strain
 from mirri.biolomics.remote.endoint_names import (GROWTH_MEDIUM_WS, TAXONOMY_WS,
                                                  ONTOBIOTOPE_WS, BIBLIOGRAPHY_WS, SEQUENCE_WS, COUNTRY_WS)
 from mirri.settings import (
    ALLOWED_FORMS_OF_SUPPLY,
    NAGOYA_PROBABLY_SCOPE,
    NAGOYA_NO_RESTRICTIONS,
    NAGOYA_DOCS_AVAILABLE,
    NO_RESTRICTION,
    ONLY_RESEARCH,
    COMMERCIAL_USE_WITH_AGREEMENT,
 )
 from mirri.biolomics.settings import MIRRI_FIELDS
 from mirri.utils import get_pycountry
 NAGOYA_TRANSLATOR = {
    NAGOYA_NO_RESTRICTIONS: "no known restrictions under the Nagoya protocol",
    NAGOYA_DOCS_AVAILABLE: "documents providing proof of legal access and terms of use available at the collection",
    NAGOYA_PROBABLY_SCOPE: "strain probably in scope, please contact the culture collection",
 }
 REV_NAGOYA_TRANSLATOR = {v: k for k, v in NAGOYA_TRANSLATOR.items()}
 RESTRICTION_USE_TRANSLATOR = {
    NO_RESTRICTION: "no restriction apply",
    ONLY_RESEARCH: "for research use only",
    COMMERCIAL_USE_WITH_AGREEMENT: "for commercial development a special agreement is requested",
 }
 REV_RESTRICTION_USE_TRANSLATOR = {v: k for k,
                                  v in RESTRICTION_USE_TRANSLATOR.items()}
 DATE_TYPE_FIELDS = ("Date of collection", "Date of isolation",
                    "Date of deposit", "Date of inclusion in the catalogue")
 BOOLEAN_TYPE_FIELDS = ("Strain from a registered collection", "Dual use",
                       "Quarantine in Europe", "Interspecific hybrid")  # , 'GMO')
 FILE_TYPE_FIELDS = ("MTA file", "ABS related files")
 MAX_MIN_TYPE_FIELDS = ("Tested temperature growth range",
                       "Recommended growth temperature")
 LIST_TYPES_TO_JOIN = ('Other denomination', 'Plasmids collections fields', 'Plasmids')
 MARKER_TYPE_MAPPING = {
    '16S rRNA': 'Sequences 16s', # or Sequences c16S rRNA
    'ACT': 'Sequences ACT',
    'CaM': 'Sequences CaM',
    'EF-1α': 'Sequences TEF1a',
    'ITS': 'Sequences ITS',
    'LSU': 'Sequences LSU',
    'RPB1': 'Sequences RPB1',
    'RPB2': 'Sequences RPB2',
    'TUBB': 'Sequences TUB' # or Sequences Beta tubulin
 }
 def serialize_to_biolomics(strain: StrainMirri, client=None, update=False,
                           log_fhand=None):  # sourcery no-metrics
    if log_fhand is None:
        log_fhand = sys.stdout
    strain_record_details = {}
    for field in MIRRI_FIELDS:
        try:
            biolomics_field = field["biolomics"]["field"]
            biolomics_type = field["biolomics"]["type"]
        except KeyError:
            # print(f'biolomics not configured: {field["label"]}')
            continue
        label = field["label"]
        attribute = field["attribute"]
        value = rgetattr(strain, attribute, None)
        if value is None:
            continue
        if label == "Accession number":
            value = f"{strain.id.collection} {strain.id.number}"
        if label == "Restrictions on use":
            value = RESTRICTION_USE_TRANSLATOR[value]
        elif label == "Nagoya protocol restrictions and compliance conditions":
            value = NAGOYA_TRANSLATOR[value]
        elif label in FILE_TYPE_FIELDS:
            value = [{"Name": "link", "Value": fname} for fname in value]
        elif label == "Other culture collection numbers":
            value = "; ".join(on.strain_id for on in value) if value else None
        elif label in BOOLEAN_TYPE_FIELDS:
            value = 'yes' if value else 'no'
        elif label in 'GMO':
            value = 'Yes' if value else 'No'
        elif label == "Organism type":
            org_types = [ot.name for ot in value]
            value = []
            for ot in ORG_TYPES.keys():
                is_organism = "yes" if ot in org_types else "no"
                value.append({"Name": ot, "Value": is_organism})
        elif label == 'Taxon name':
            if client:
                taxa = strain.taxonomy.long_name.split(';')
                value = []
                for taxon_name in taxa:
                    taxon = get_remote_rlink(client, TAXONOMY_WS,
                                             taxon_name)
                    if taxon:
                        value.append(taxon)
                if not value:
                    msg = f'WARNING: {strain.taxonomy.long_name} not found in database'
                    log_fhand.write(msg + '\n')
                    # TODO: decide to raise or not if taxon not in MIRRI DB
                    #raise ValueError(msg)
        elif label in DATE_TYPE_FIELDS:
            year = value._year
            month = value._month or 1
            day = value._day or 1
            if year is None:
                continue
            value = f"{year}-{month:02}-{day:02}"
        elif label == 'History of deposit':
            value = " < ".join(value)
        elif label in MAX_MIN_TYPE_FIELDS:
            if isinstance(value, (int, float, str)):
                _max, _min = float(value), float(value)
            else:
                _max, _min = float(value['max']), float(value['min'])
            content = {"MaxValue": _max, "MinValue": _min,
                       "FieldType": biolomics_type}
            strain_record_details[biolomics_field] = content
            continue
        elif label in LIST_TYPES_TO_JOIN:
            value = '; '.join(value)
        # TODO: Check how to deal with crossrefs
        elif label == "Recommended medium for growth":
            if client is not None:
                ref_value = []
                for medium in value:
                    ws_gm = client.retrieve_by_name(GROWTH_MEDIUM_WS, medium)
                    if ws_gm is None:
                        raise ValueError(
                            f'Can not find the growth medium: {medium}')
                    gm = {"Name": {"Value": medium, "FieldType": "E"},
                          "RecordId": ws_gm.record_id}
                    ref_value.append(gm)
                value = ref_value
            else:
                continue
        elif label == "Form of supply":
            _value = []
            for form in ALLOWED_FORMS_OF_SUPPLY:
                is_form = "yes" if form in value else "no"
                _value.append({"Name": form, "Value": is_form})
            value = _value
        # print(label, value), biolomics_field
        elif label == "Coordinates of geographic origin":
            value = {'Latitude': strain.collect.location.latitude,
                     'Longitude': strain.collect.location.longitude}
            precision = strain.collect.location.coord_uncertainty
            if precision is not None:
                value['Precision'] = precision
        elif label == "Geographic origin":
            if client is not None and value.country is not None:
                country = get_pycountry(value.country)
                if country is None:
                    log_fhand.write(f'WARNING: {value.country} Not a valida country code/name\n')
                else:
                    _value = get_country_record(country, client)
                    if _value is None:  # TODO: Remove this once the countries are added to the DB
                        msg = f'WARNING: {value.country} not in MIRRI DB'
                        log_fhand.write(msg + '\n')
                        #raise ValueError(msg)
                    else:
                        content = {"Value": [_value], "FieldType": "RLink"}
                        strain_record_details['Country'] = content
            _value = []
            for sector in ('state', 'municipality', 'site'):
                sector_val = getattr(value, sector, None)
                if sector_val:
                    _value.append(sector_val)
            value = "; ".join(_value) if _value else None
            if value is None:
                continue
        elif label == "Ontobiotope":
            if client and value:
                onto = get_remote_rlink(client, ONTOBIOTOPE_WS, value)
                value = [onto] if onto is not None else None
        elif label == 'Literature':
            if client and value:
                pub_rlinks = []
                for pub in value:
                    rlink = get_remote_rlink(client, BIBLIOGRAPHY_WS, pub.title)
                    if rlink:
                        pub_rlinks.append(rlink)
                if pub_rlinks:
                    value = pub_rlinks
            else:
                continue
        elif label == '':
            pass
        elif label == 'Ploidy':
            value = _translate_polidy(value)
        if value is not None:
            content = {"Value": value, "FieldType": biolomics_type}
            strain_record_details[biolomics_field] = content
    # if False:
    #     record_details["Data provided by"] = {
    #         "Value": strain.id.collection, "FieldType": "V"}
    #Markers
    if client:
        add_markers_to_strain_details(client, strain, strain_record_details)
    strain_structure = {"RecordDetails": strain_record_details}
    if update:
        strain_structure['RecordId'] = strain.record_id
        strain_structure['RecordName'] = strain.record_name
    else:
        strain_structure["Acronym"] = "MIRRI"
    return strain_structure
 def add_markers_to_strain_details(client, strain: StrainMirri, details):
    for marker in strain.genetics.markers:
        marker_name = marker.marker_id
        marker_in_ws = client.retrieve_by_name(SEQUENCE_WS, marker_name)
        if marker_in_ws is None:
            print('Marker not in web service')
            continue
        marker_type = marker.marker_type
        ws_marker = {
            "Value": [{
                  "Name": {"Value": marker_in_ws.record_name,
                           "FieldType": "E"},
                  "RecordId": marker_in_ws.record_id
            }],
            "FieldType": "NLink"
        }
        if marker_in_ws.marker_seq:
            ws_marker['Value'][0]["TargetFieldValue"] = {
                "Value": {"Sequence": marker_in_ws.marker_seq},
                "FieldType": "N"
            }
        details[MARKER_TYPE_MAPPING[marker_type]] = ws_marker
 def get_remote_rlink(client, endpoint, record_name):
    entity = client.retrieve_by_name(endpoint, record_name)
    if entity:
        # some Endpoints does not serialize the json into a python object yet
        try:
            record_name = entity.record_name
            record_id = entity.record_id
        except AttributeError:
            record_name = entity["RecordName"]
            record_id = entity["RecordId"]
        return {"Name": {"Value": record_name, "FieldType": "E"},
                "RecordId": record_id}
 def add_strain_rlink_to_entity(record, strain_id, strain_name):
    field_strain = {
        "FieldType": "RLink",
        'Value': [{
            'Name': {'Value': strain_name, 'FieldType': "E"},
            'RecordId': strain_id
        }]
    }
    record['RecordDetails']['Strains'] = field_strain
    return record
 PLOIDY_TRANSLATOR = {
    0: 'Aneuploid',
    1: 'Haploid',
    2: 'Diploid',
    3: 'Triploid',
    4: 'Tetraploid',
    9: 'Polyploid'
 }
 REV_PLOIDY_TRANSLATOR = {v: k for k, v in PLOIDY_TRANSLATOR.items()}
 def _translate_polidy(ploidy):
    # print('ploidy in serializer', ploidy)
    try:
        ploidy = int(ploidy)
    except TypeError:
        return '?'
    try:
        ploidy = PLOIDY_TRANSLATOR[ploidy]
    except KeyError:
        ploidy = 'Polyploid'
    return ploidy
 def serialize_from_biolomics(biolomics_strain, client=None):  # sourcery no-metrics
    strain = StrainMirri()
    strain.record_id = biolomics_strain.get('RecordId', None)
    strain.record_name = biolomics_strain.get('RecordName', None)
    for field in MIRRI_FIELDS:
        try:
            biolomics_field = field["biolomics"]["field"]
        except KeyError:
            # print(f'biolomics not configured: {field["label"]}')
            continue
        label = field["label"]
        attribute = field["attribute"]
        field_data = biolomics_strain['RecordDetails'].get(biolomics_field, None)
        if field_data is None:
            continue
        is_empty = field_data.get('IsEmpty')
        if is_empty:
            continue
        if biolomics_field in ('Tested temperature growth range', 'Recommended growth temperature'):
            value = {'max': field_data.get('MaxValue', None),
                     'min': field_data.get('MinValue', None)}
        else:
            value = field_data['Value']
        # if value in (None, '', [], {}, '?', 'Unknown', 'nan', 'NaN'):
        #     continue
        # print(label, attribute, biolomics_field, value)
        if label == 'Accession number':
            number = strain.record_name
            mirri_id = StrainId(number=number)
            strain.synonyms = [mirri_id]
            coll, num = value.split(' ', 1)
            accession_number_id = StrainId(collection=coll, number=num)
            strain.id = accession_number_id
            continue
        elif label == "Restrictions on use":
            value = REV_RESTRICTION_USE_TRANSLATOR[value]
        elif label == 'Nagoya protocol restrictions and compliance conditions':
            value = REV_NAGOYA_TRANSLATOR[value]
        elif label in FILE_TYPE_FIELDS:
            value = [f['Value'] for f in value]
        elif label == "Other culture collection numbers":
            other_numbers = []
            for on in value.split(";"):
                on = on.strip()
                try:
                    collection, number = on.split(" ", 1)
                except ValueError:
                    collection = None
                    number = on
                _id = StrainId(collection=collection, number=number)
                other_numbers.append(_id)
            value = other_numbers
        elif label in BOOLEAN_TYPE_FIELDS:
            value = value == 'yes'
        elif label == 'GMO':
            value = value == 'Yes'
        elif label == "Organism type":
            organism_types = [OrganismType(item['Name']) for item in value if item['Value'] == 'yes']
            if organism_types:
                value = organism_types
        elif label in 'Taxon name':
            value = ";".join([v['Name']['Value'] for v in value])
            add_taxon_to_strain(strain, value)
            continue
        elif label in DATE_TYPE_FIELDS:
            # date_range = DateRange()
            value = DateRange().strpdate(value)
        elif label in ("Recommended growth temperature",
                       "Tested temperature growth range"):
            if (value['max'] is None or value['max'] == 0 or
                    value['min'] is None and value['min'] == 0):
                continue
        elif label == "Recommended medium for growth":
            value = [v['Name']['Value'] for v in value]
        elif label == "Form of supply":
            value = [item['Name'] for item in value if item['Value'] == 'yes']
        elif label in LIST_TYPES_TO_JOIN:
            value = [v.strip() for v in value.split(";")]
        elif label == "Coordinates of geographic origin":
            if ('Longitude' in value and 'Latitude' in value and
                    isinstance(value['Longitude'], float) and
                    isinstance(value['Latitude'], float)):
                strain.collect.location.longitude = value['Longitude']
                strain.collect.location.latitude = value['Latitude']
                if value['Precision'] != 0:
                    strain.collect.location.coord_uncertainty = value['Precision']
            continue
        elif label == "Altitude of geographic origin":
            value = float(value)
        elif label == "Geographic origin":
            strain.collect.location.site = value
            continue
        elif label == 'Ontobiotope':
            try:
                value = re.search("(OBT:[0-9]{5,7})", value[0]['Name']['Value']).group()
            except (KeyError, IndexError, AttributeError):
                continue
        elif label == 'Ploidy':
            value = REV_PLOIDY_TRANSLATOR[value]
        elif label == 'Literature':
            if client is not None:
                pubs = []
                for pub in value:
                    pub = client.retrieve_by_id(BIBLIOGRAPHY_WS, pub['RecordId'])
                    pubs.append(pub)
                value = pubs
        rsetattr(strain, attribute, value)
    # fields that are not in MIRRI FIELD list
    # country
    if 'Country' in biolomics_strain['RecordDetails'] and biolomics_strain['RecordDetails']['Country']:
        try:
            country_name = biolomics_strain['RecordDetails']['Country']['Value'][0]['Name']['Value']
            country = get_pycountry(country_name)
            country_3 = country.alpha_3 if country else None
        except (IndexError, KeyError):
            country_3 = None
        if country_3:
            strain.collect.location.country = country_3
    # Markers:
    if client:
        markers = []
        for marker_type, biolomics_marker in MARKER_TYPE_MAPPING.items():
            try:
                marker_value = biolomics_strain['RecordDetails'][biolomics_marker]['Value']
            except KeyError:
                continue
            if not marker_value:
                continue
            for marker in marker_value:
                record_id = marker['RecordId']
                marker = client.retrieve_by_id(SEQUENCE_WS, record_id)
                if marker is not None:
                    markers.append(marker)
        if markers:
            strain.genetics.markers = markers
    return strain
 def get_country_record(country, client):
    for attr in ('common_name', 'name', 'official_name'):
        val = getattr(country, attr, None)
        if val is not None:
            _value = get_remote_rlink(client, COUNTRY_WS, val)
            if _value is not None:
                return _value
    return None
--- a/mirri/biolomics/serializers/taxonomy.py
+++ b/mirri/biolomics/serializers/taxonomy.py
@ -0,0 +1,64 @@
 from mirri.entities.strain import Taxonomy
 #TODO this is all wrong, needs deep revision
 class TaxonomyMirri(Taxonomy):
    def __init__(self, **kwargs):
        super().__init__(freeze=False, **kwargs)
    fields = ['record_id', 'record_name', 'acronym', 'full_description',
              'ingredients', 'description', 'other_name', 'ph',
              'sterilization_conditions']
    def __init__(self, **kwargs):
        self._data = {}
        for field in self.fields:
            if field in kwargs and kwargs['field'] is not None:
                value = kwargs['field']
                setattr(self, field, value)
    def __setattr__(self, attr, value):
        if attr == '_data':
            super().__setattr__(attr, value)
            return
        if attr not in self.fields:
            raise TypeError(f'{attr} not an allowed attribute')
        self._data[attr] = value
    def __getattr__(self, attr):
        if attr == '_data':
            return super
        if attr not in self.fields and attr != '_data':
            raise TypeError(f'{attr} not an allowed attribute')
        return self._data.get(attr, None)
    def dict(self):
        return self._data
 def serialize_from_biolomics(ws_data, client=None) -> TaxonomyMirri:
    return ws_data
    medium = GrowthMedium()
    medium.record_name = ws_data.get('RecordName', None)
    medium.record_id = ws_data.get('RecordId', None)
    for key, value in ws_data['RecordDetails'].items():
        value = value['Value']
        if not value:
            continue
        if key == "Full description":
            medium.full_description = value
        if key == "Ingredients":
            medium.ingredients = value
        if key == 'Medium description':
            medium.description = value
        if key == 'Other name':
            medium.other_name= value
        if key == 'pH':
            medium.ph = value
        if key == 'Sterilization conditions':
            medium.sterilization_conditions = value
    return medium
--- a/mirri/biolomics/settings.py
+++ b/mirri/biolomics/settings.py
@ -0,0 +1,373 @@
 try:
    from mirri.biolomics.secrets import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
 except ImportError:
    raise ImportError(
        'You need a secrets.py in the project dir. with CLIENT_ID, SECRET_ID, USERNAME, PASSWORD')
 MIRRI_FIELDS = [
    {
        "attribute": "id",
        "label": "Accession number",
        "mandatory": True,
        "biolomics": {"field": "Collection accession number", "type": "E"},
    },
    {
        "attribute": "restriction_on_use",
        "label": "Restrictions on use",
        "mandatory": True,
        "biolomics": {"field": "Restrictions on use", "type": "T"},
    },
    {
        "attribute": "nagoya_protocol",
        "label": "Nagoya protocol restrictions and compliance conditions",
        "mandatory": True,
        "biolomics": {"field": "Nagoya protocol restrictions and compliance conditions", "type": "T"},
    },
    {
        "attribute": "abs_related_files",
        "label": "ABS related files",
        "mandatory": False,
        "biolomics": {"field": "ABS related files", "type": "U"},
    },
    {
        "attribute": "mta_files",
        "label": "MTA file",
        "mandatory": False,
        "biolomics": {"field": "MTA files URL", "type": "U"},
    },
    {
        "attribute": "other_numbers",
        "label": "Other culture collection numbers",
        "mandatory": False,
        "biolomics": {"field": "Other culture collection numbers", "type": "E"},
    },
    {
        "attribute": "is_from_registered_collection",
        "label": "Strain from a registered collection",
        "mandatory": False,
        "biolomics": {"field": "Strain from a registered collection", "type": "T"},
    },
    {
        "attribute": "risk_group",
        "label": "Risk Group",
        "mandatory": True,
        "biolomics": {"field": "Risk group", "type": "T"},
    },
    {
        "attribute": "is_potentially_harmful",
        "label": "Dual use",
        "mandatory": False,
        "biolomics": {"field": "Dual use", "type": "T"},
    },
    {
        "attribute": "is_subject_to_quarantine",
        "label": "Quarantine in Europe",
        "mandatory": False,
        "biolomics": {"field": "Quarantine in Europe", "type": "T"},
    },
    {
        "attribute": "taxonomy.organism_type",
        "label": "Organism type",
        "mandatory": True,
        "biolomics": {"field": "Organism type", "type": "C"},
    },
    {
        "attribute": "taxonomy.long_name",
        "label": "Taxon name",
        "mandatory": True,
        "biolomics": {"field": "Taxon name", "type": "SynLink"},
    },
    {
        "attribute": "taxonomy.infrasubspecific_name",
        "label": "Infrasubspecific names",
        "mandatory": False,
        "biolomics": {"field": "Infrasubspecific names", "type": "E"},
    },
    {
        "attribute": "taxonomy.comments",
        "label": "Comment on taxonomy",
        "mandatory": False,
        "biolomics": {"field": "Comment on taxonomy", "type": "E"},
    },
    {
        "attribute": "taxonomy.interspecific_hybrid",
        "label": "Interspecific hybrid",
        "mandatory": False,
        "biolomics": {"field": "Interspecific hybrid", "type": "T"},
    },
    {
        "attribute": "status", "label": "Status", "mandatory": False,
        "biolomics": {"field": "Status", "type": "E"},
    },
    {
        "attribute": "history",
        "label": "History of deposit",
        "mandatory": False,
        "biolomics": {"field": "History", "type": "E"},
    },
    {
        "attribute": "deposit.who",
        "label": "Depositor",
        "mandatory": False,
        "biolomics": {"field": "Depositor", "type": "E"},
    },
    {
        "attribute": "deposit.date",
        "label": "Date of deposit",
        "mandatory": False,
        "biolomics": {"field": "Deposit date", "type": "H"},
    },
    {
        "attribute": "catalog_inclusion_date",
        "label": "Date of inclusion in the catalogue",
        "mandatory": False,
        "biolomics": {"field": "Date of inclusion in the catalogue", "type": "H"},
    },
    {
        "attribute": "collect.who",
        "label": "Collected by",
        "mandatory": False,
        "biolomics": {"field": "Collector", "type": "E"},
    },
    {
        "attribute": "collect.date",
        "label": "Date of collection",
        "mandatory": False,
        "biolomics": {"field": "Collection date", "type": "H"},
    },
    {
        "attribute": "isolation.who",
        "label": "Isolated by",
        "mandatory": False,
        "biolomics": {"field": "Isolator", "type": "E"},
    },
    {
        "attribute": "isolation.date",
        "label": "Date of isolation",
        "mandatory": False,
        "biolomics": {"field": "Isolation date", "type": "H"},
    },
    {
        "attribute": "isolation.substrate_host_of_isolation",
        "label": "Substrate/host of isolation",
        "mandatory": False,
        "biolomics": {"field": "Substrate of isolation", "type": "E"},
    },
    {
        "attribute": "growth.tested_temp_range",
        "label": "Tested temperature growth range",
        "mandatory": False,
        "biolomics": {"field": "Tested temperature growth range", "type": "S"},
    },
    {
        "attribute": "growth.recommended_temp",
        "label": "Recommended growth temperature",
        "mandatory": True,
        "biolomics": {"field": "Recommended growth temperature", "type": "S"},
    },
    {
        "attribute": "growth.recommended_media",
        "label": "Recommended medium for growth",
        "mandatory": True,
        "biolomics": {"field": "Recommended growth medium", "type": "RLink"},
    },
    {
        "attribute": "form_of_supply",
        "label": "Form of supply",
        "mandatory": True,
        "biolomics": {"field": "Form", "type": "C"},
    },
    {
        "attribute": "other_denominations",
        "label": "Other denomination",
        "mandatory": False,
        "biolomics": {"field": "Other denomination", "type": "E"},
    },
    {
        # here we use latitude to check if there is data in some of the fields
        "attribute": "collect.location.latitude",
        "label": "Coordinates of geographic origin",
        "mandatory": False,
        "biolomics": {"field": "Coordinates of geographic origin", "type": "L"},
    },
    {
        "attribute": "collect.location.altitude",
        "label": "Altitude of geographic origin",
        "mandatory": False,
        "biolomics": {"field": "Altitude of geographic origin", "type": "D"},
    },
    {
        "attribute": "collect.location",
        "label": "Geographic origin",
        "mandatory": True,
        "biolomics": {"field": "Geographic origin", "type": "E"},
    },
    {
        "attribute": "collect.habitat",
        "label": "Isolation habitat",
        "mandatory": False,
        "biolomics": {"field": "Isolation habitat", "type": "E"},
    },
    # {
    #     "attribute": "collect.habitat_ontobiotope",
    #     "label": "Ontobiotope term for the isolation habitat",
    #     "mandatory": False,
    #     "biolomics": {"field": "Ontobiotope term for the isolation habitat", "type": "E"},
    # },
    {
        "attribute": "collect.habitat_ontobiotope",
        "label": "Ontobiotope",
        "mandatory": False,
        "biolomics": {"field": "Ontobiotope", "type": "RLink"},
    },
    {
        "attribute": "genetics.gmo", "label": "GMO", "mandatory": False,
        "biolomics": {"field": "GMO", "type": "V"},
    },
    {
        "attribute": "genetics.gmo_construction",
        "label": "GMO construction information",
        "mandatory": False,
        "biolomics": {"field": "GMO construction information", "type": "E"},
    },
    {
        "attribute": "genetics.mutant_info",
        "label": "Mutant information",
        "mandatory": False,
        "biolomics": {"field": "Mutant information", "type": "E"},
    },
    {
        "attribute": "genetics.genotype",
        "label": "Genotype",
        "mandatory": False,
        "biolomics": {"field": "Genotype", "type": "E"},
    },
    {
        "attribute": "genetics.sexual_state",
        "label": "Sexual state",
        "mandatory": False,
        "biolomics": {"field": "Sexual state", "type": "E"},
    },
    {
        "attribute": "genetics.ploidy",
        "label": "Ploidy",
        "mandatory": False,
        "biolomics": {"field": "Ploidy", "type": "T"},
    },
    {
        "attribute": "genetics.plasmids",
        "label": "Plasmids",
        "mandatory": False,
        "biolomics": {"field": "Plasmids", "type": "E"},
    },
    {
        "attribute": "genetics.plasmids_in_collections",
        "label": "Plasmids collections fields",
        "mandatory": False,
        "biolomics": {"field": "Plasmids collections fields", "type": "E"},
    },
    {
        "attribute": "publications",
        "label": "Literature",
        "mandatory": False,
        "biolomics": {"field": "Literature", "type": "RLink"},
    },
    {
        "attribute": "pathogenicity",
        "label": "Pathogenicity",
        "mandatory": False,
        "biolomics": {"field": "Pathogenicity", "type": "E"},
    },
    {
        "attribute": "enzyme_production",
        "label": "Enzyme production",
        "mandatory": False,
        "biolomics": {"field": "Enzyme production", "type": "E"},
    },
    {
        "attribute": "production_of_metabolites",
        "label": "Production of metabolites",
        "mandatory": False,
        "biolomics": {"field": "Metabolites production", "type": "E"},
    },
    {
        "attribute": "applications",
        "label": "Applications",
        "mandatory": False,
        "biolomics": {"field": "Applications", "type": "E"},
    },
    {
        "attribute": "remarks", "label": "Remarks", "mandatory": False,
        "biolomics": {"field": "Remarks", "type": "E"},
    },
    {
        "attribute": "literature_linked_to_the_sequence_genome",
        "label": "Literature linked to the sequence/genome",
        "mandatory": False,
        # "biolomics": {"field": "MTA files URL", "type": "U"},
    },
 ]
 PUB_MIRRI_FIELDS = [
    {
        "attribute": "pub_id", "mandatory": False,
        "biolomics": {"field": "", "type": "E"},
    },
    {
        "attribute": "pubmed_id", "mandatory": False,
        "biolomics": {"field": "PubMed ID", "type": "E"},
    },
    {
        "attribute": "doi", "mandatory": False,
        "biolomics": {"field": "DOI number", "type": "E"},
    },
    {
        "attribute": "title", "mandatory": False,
        "biolomics": {"field": "Title", "type": "E"},
    },
    {
        "attribute": "authors", "mandatory": False,
        "biolomics": {"field": "Authors", "type": "E"},
    },
    {
        "attribute": "journal", "mandatory": False,
        "biolomics": {"field": "Journal", "type": "E"},
    },
    {
        "attribute": "volumen", "mandatory": False,
        "biolomics": {"field": "Volume", "type": "E"},
    },
    {
        "attribute": "issue", "mandatory": False,
        "biolomics": {"field": "Issue", "type": "E"},
    },
    {
        "attribute": "first_page", "mandatory": False,
        "biolomics": {"field": "Page from", "type": "E"},
    },
    {
        "attribute": "last_page", "mandatory": False,
        "biolomics": {"field": "Page to", "type": "E"},
    },
    {
        "attribute": "last_page", "label": "", "mandatory": False,
        "biolomics": {"field": "", "type": "E"},
    },
    {
        "attribute": "last_page", "label": "", "mandatory": False,
        "biolomics": {"field": "", "type": "E"},
    },
    {
        "attribute": "book_title", "label": "", "mandatory": False,
        "biolomics": {"field": "Book title", "type": "E"},
    },
    {
        "attribute": "publisher", "label": "", "mandatory": False,
        "biolomics": {"field": "Publisher", "type": "E"},
    },
    {
        "attribute": "editor", "label": "", "mandatory": False,
        "biolomics": {"field": "Editor(s)", "type": "E"},
    },
 ]
--- a/mirri/data/ontobiotopes.csv
+++ b/mirri/data/ontobiotopes.csv
--- a/mirri/entities/init.py
+++ b/mirri/entities/init.py
--- a/mirri/entities/_private_classes.py
+++ b/mirri/entities/_private_classes.py
@ -0,0 +1,45 @@
 class FrozenClass(object):
    __isfrozen = False
    def __setattr__(self, key, value):
        # print(dir(self))
        if self.__isfrozen and not hasattr(self, key):
            msg = f"Can not add {key} to {self.__class__.__name__}. It is not one of its attributes"
            raise TypeError(msg)
        object.__setattr__(self, key, value)
    def _freeze(self):
        self.__isfrozen = True
 class _FieldBasedClass(FrozenClass):
    _fields = []
    def __init__(self, data=None, freeze=True):
        self._data = {}
        if data is None:
            data = {}
        for field in self._fields:
            value = data.get(field["label"], None)
            setattr(self, field["attribute"], value)
        if freeze:
            self._freeze()
    def __eq__(self, o: object) -> bool:
        for field in self._fields:
            val1 = getattr(self, field["attribute"], None)
            val2 = getattr(o, field["attribute"], None)
            if val1 != val2:
                return False
        return True
    def __bool__(self):
        return bool(self.dict())
    def dict(self):
        data = {}
        for field in self._fields:
            value = getattr(self, field["attribute"])
            if value is not None:
                data[field["label"]] = value
        return data
--- a/mirri/entities/date_range.py
+++ b/mirri/entities/date_range.py
@ -0,0 +1,87 @@
 from calendar import monthrange
 from collections import OrderedDict
 from copy import copy
 from datetime import date
 class DateRange:
    def __init__(self, year=None, month=None, day=None):
        self._year = year
        if month is not None and (month < 1 or month > 12):
            raise ValueError("Month must be between 1 and 12")
        self._month = month
        if day is not None and (day < 1 or day > 31):
            raise ValueError("Day must be between 1 and 31")
        self._day = day
        self._start = None
        self._end = None
        if year or month or day:
            self._create_range()
    def __str__(self):
        _strdate = self.strfdate
        if _strdate is None:
            return ""
        return _strdate
    def __bool__(self):
        return bool(self._year or self._month or self._day)
    def _create_range(self):
        year = self._year
        month = self._month
        day = self._day
        if year and month and day:
            start_date = date(year=year, month=month, day=day)
            end_date = date(year=year, month=month, day=day)
        elif month is None:
            start_date = date(year=year, month=1, day=1)
            end_date = date(year=year, month=12, day=31)
        elif day is None:
            month_last_day = monthrange(year, month)[1]
            start_date = date(year=year, month=month, day=1)
            end_date = date(year=year, month=month, day=month_last_day)
        self._start = start_date
        self._end = end_date
    def strpdate(self, date_str: str):
        date_str = str(date_str)
        orig_date = copy(date_str)
        date_str = date_str.replace("/", "").replace("-", "")
        if len(date_str) > 8:
            msg = f"Malformed date, Mora caracters than expected: {orig_date}"
            raise ValueError(msg)
        month = None
        day = None
        if len(date_str) >= 4:
            year = int(date_str[:4])
        if len(date_str) >= 6:
            month = int(date_str[4:6])
            if month < 1 or month > 12:
                raise ValueError("Month must be between 1 and 12")
        if len(date_str) >= 8:
            day = int(date_str[6:8])
            if day is not None and (day < 1 or day > 31):
                raise ValueError("Day must be between 1 and 31")
        self._year = year
        self._month = month
        self._day = day
        self._create_range()
        return self
    @property
    def strfdate(self):
        year = "----" if self._year is None else f"{self._start.year:04}"
        month = "--" if self._month is None else f"{self._start.month:02}"
        day = "--" if self._day is None else f"{self._start.day:02}"
        _date = str(f"{year}{month}{day}")
        if _date == "--------":
            return None
        return _date
    @property
    def range(self):
        return OrderedDict([("start", self._start), ("end", self._end)])
--- a/mirri/entities/growth_medium.py
+++ b/mirri/entities/growth_medium.py
@ -0,0 +1,47 @@
 class GrowthMedium:
    fields = ['record_id', 'record_name', 'acronym', 'full_description',
              'ingredients', 'description', 'other_name', 'ph',
              'sterilization_conditions']
    def __init__(self, **kwargs):
        self._data = {}
        for field in self.fields:
            if field in kwargs and kwargs['field'] is not None:
                value = kwargs['field']
                setattr(self, field, value)
    def __setattr__(self, attr, value):
        if attr == '_data':
            super().__setattr__(attr, value)
            return
        if attr not in self.fields:
            raise TypeError(f'{attr} not an allowed attribute')
        self._data[attr] = value
    def __getattr__(self, attr):
        if attr == '_data':
            return super
        if attr not in self.fields and attr != '_data':
            raise TypeError(f'{attr} not an allowed attribute')
        return self._data.get(attr, None)
    def dict(self):
        return self._data
    def update(self, growth_media, include_fields=None):
        for field in self.fields:
            if include_fields and field in include_fields:
                new_value = getattr(growth_media, field, None)
                actual_value = getattr(self, field, None)
                if new_value is not None and new_value != actual_value:
                    setattr(self, field, new_value)
    def is_equal(self, other, exclude_fields=[]):
        for field in self.fields:
            if field in exclude_fields:
                continue
            value_of_other = getattr(other, field, None)
            value_of_self = getattr(self, field, None)
            if value_of_self is not None and value_of_self != value_of_other:
                return False
        return True
--- a/mirri/entities/location.py
+++ b/mirri/entities/location.py
@ -0,0 +1,170 @@
 from __future__ import  annotations
 import hashlib
 from typing import Union
 from mirri.entities._private_classes import _FieldBasedClass
 from mirri.settings import (
    ALTITUDE,
    COORD_SPATIAL_REFERENCE,
    COORDUNCERTAINTY,
    COUNTRY,
    GEOREF_METHOD,
    ISLAND,
    LATITUDE,
    LONGITUDE,
    MUNICIPALITY,
    OTHER,
    PROVINCE,
    SITE,
    STATE,
 )
 import pycountry
 class Location(_FieldBasedClass):
    _fields = [
        {"attribute": "country", "label": COUNTRY},
        {"attribute": "state", "label": STATE},
        {"attribute": "province", "label": PROVINCE},
        {"attribute": "municipality", "label": MUNICIPALITY},
        {"attribute": "site", "label": SITE},
        {"attribute": "other", "label": OTHER},
        {"attribute": "island", "label": ISLAND},
        {"attribute": "longitude", "label": LONGITUDE},
        {"attribute": "latitude", "label": LATITUDE},
        {"attribute": "altitude", "label": ALTITUDE},
        {"attribute": "coord_spatial_reference", "label": COORD_SPATIAL_REFERENCE},
        {"attribute": "coord_uncertainty", "label": COORDUNCERTAINTY},
        {"attribute": "georef_method", "label": GEOREF_METHOD},
    ]
    def __str__(self):
        _site = []
        if self.country:
            _site.append(self.country)
        if self.province:
            _site.append(self.province)
        if self.site:
            _site.append(self.site)
        if self.municipality:
            _site.append(self.municipality)
        return ": ".join(_site)
    def __hash__(self):
        hash_str = ''
        for field in self._fields:
            value = str(getattr(self, field, None))
            hash_str += value
        # hash_str = str(self.country) + str(self.province) + str(self.municipality) + str(self.site)
        return int(hashlib.sha1(hash_str.encode("utf-8")).hexdigest(), 16) % (10 ** 8)
    @property
    def country(self) -> Union[str, None]:
        return self._data.get(COUNTRY, None)
    @country.setter
    def country(self, code3: str):
        if code3 is not None:
            _country = pycountry.countries.get(alpha_3=code3)
            if _country is None:
                _country = pycountry.historic_countries.get(alpha_3=code3)
            if _country is None and code3 != 'INW':
                raise ValueError(f'{code3}, not a valid 3 letter country name')
            self._data[COUNTRY] = code3
    @property
    def province(self) -> Union[str, None]:
        return self._data.get(PROVINCE, None)
    @province.setter
    def province(self, code3: str):
        self._data[PROVINCE] = code3
    @property
    def municipality(self) -> Union[str, None]:
        return self._data.get(MUNICIPALITY, None)
    @municipality.setter
    def municipality(self, name: str):
        self._data[MUNICIPALITY] = name
    @property
    def site(self) -> Union[str, None]:
        return self._data.get(SITE, None)
    @site.setter
    def site(self, name: str):
        self._data[SITE] = name
    @property
    def latitude(self):
        return self._data.get(LATITUDE, None)
    @latitude.setter
    def latitude(self, latitude: float):
        self._data[LATITUDE] = latitude
    @property
    def longitude(self) -> Union[float, None]:
        return self._data.get(LONGITUDE, None)
    @longitude.setter
    def longitude(self, longitude: float):
        self._data[LONGITUDE] = longitude
    @property
    def altitude(self) -> Union[int, float, None]:
        return self._data.get(ALTITUDE, None)
    @altitude.setter
    def altitude(self, altitude: Union[int, float]):
        self._data[ALTITUDE] = altitude
    @property
    def georef_method(self) -> Union[str, None]:
        return self._data.get(GEOREF_METHOD, None)
    @georef_method.setter
    def georef_method(self, georef_method: str):
        self._data[GEOREF_METHOD] = georef_method
    @property
    def coord_uncertainty(self) -> Union[str, None]:
        return self._data.get(COORDUNCERTAINTY, None)
    @coord_uncertainty.setter
    def coord_uncertainty(self, coord_uncertainty: str):
        self._data[COORDUNCERTAINTY] = coord_uncertainty
    @property
    def coord_spatial_reference(self) -> Union[str, None]:
        return self._data.get(COORD_SPATIAL_REFERENCE, None)
    @coord_spatial_reference.setter
    def coord_spatial_reference(self, coord_spatial_reference: str):
        self._data[COORD_SPATIAL_REFERENCE] = coord_spatial_reference
    @property
    def state(self) -> Union[str, None]:
        return self._data.get(STATE, None)
    @state.setter
    def state(self, state):
        self._data[STATE] = state
    @property
    def island(self) -> Union[str, None]:
        return self._data.get(ISLAND, None)
    @island.setter
    def island(self, island):
        self._data[ISLAND] = island
    @property
    def other(self) -> Union[str, None]:
        return self._data.get(OTHER, None)
    @other.setter
    def other(self, other):
        self._data[OTHER] = other
--- a/mirri/entities/publication.py
+++ b/mirri/entities/publication.py
@ -0,0 +1,202 @@
 from mirri.settings import (BOOK_EDITOR, BOOK_PUBLISHER, BOOK_TITLE,
                            PUB_AUTHORS, PUB_DOI, PUB_FIRST_PAGE, PUB_ID,
                            PUB_ISSUE, PUB_JOURNAL, PUB_LAST_PAGE,
                            PUB_PUBMED_ID, PUB_TITLE, PUB_VOLUME)
 # Maybe we could implement some crossref calls to fill all field data
 # and get DOI where ever is possible
 RECORD_ID = 'RecordId'
 RECORD_NAME = 'RecordName'
 class Publication:
    def __init__(self, data=None):
        self._data = {}
        if data:
            self.record_id = data.get('RecordId', None)
            self.record_name = data.get('RecordName', None)
            self.pubmed_id = data.get(PUB_PUBMED_ID, None)
            self.doi = data.get(PUB_DOI, None)
            self.title = data.get(PUB_TITLE, None)
            self.authors = data.get(PUB_AUTHORS, None)
            self.journal = data.get(PUB_JOURNAL, None)
            self.volume = data.get(PUB_VOLUME, None)
            self.issue = data.get(PUB_ISSUE, None)
            self.first_page = data.get(PUB_FIRST_PAGE, None)
            self.last_page = data.get(PUB_LAST_PAGE, None)
            self.editor = data.get(BOOK_EDITOR, None)
            self.publisher = data.get(BOOK_PUBLISHER, None)
            self.book_title = data.get(BOOK_TITLE, None)
            self.isbn = data.get('ISBN', None)
            self.issn = data.get('ISSN', None)
            self.year = data.get('Year', None)
    def __bool__(self):
        return bool(self._data)
    def dict(self):
        return self._data
    @property
    def id(self) -> int:
        return self._data.get(PUB_ID, None)
    @id.setter
    def id(self, value: int):
        if value is not None:
            self._data[PUB_ID] = value
    @property
    def record_id(self) -> int:
        return self._data.get(RECORD_ID, None)
    @record_id.setter
    def record_id(self, value: int):
        if value is not None:
            self._data[RECORD_ID] = value
    @property
    def record_name(self) -> int:
        return self._data.get(RECORD_NAME, None)
    @record_name.setter
    def record_name(self, value: int):
        if value is not None:
            self._data[RECORD_NAME] = value
    @property
    def pubmed_id(self):
        return self._data.get(PUB_PUBMED_ID, None)
    @pubmed_id.setter
    def pubmed_id(self, value: str):
        if value is not None:
            self._data[PUB_PUBMED_ID] = value
    @property
    def isbn(self):
        return self._data.get('ISBN', None)
    @isbn.setter
    def isbn(self, value: str):
        if value is not None:
            self._data['ISBN'] = value
    @property
    def issn(self):
        return self._data.get('ISSN', None)
    @issn.setter
    def issn(self, value: str):
        if value is not None:
            self._data['ISSN'] = value
    @property
    def doi(self):
        return self._data.get(PUB_DOI, None)
    @doi.setter
    def doi(self, value: str):
        if value is not None:
            self._data[PUB_DOI] = value
    @property
    def title(self):
        return self._data.get(PUB_TITLE, None)
    @title.setter
    def title(self, value: str):
        if value is not None:
            self._data[PUB_TITLE] = value
            self._data[RECORD_NAME] = value
    @property
    def authors(self):
        return self._data.get(PUB_AUTHORS, None)
    @authors.setter
    def authors(self, value: str):
        if value is not None:
            self._data[PUB_AUTHORS] = value
    @property
    def journal(self):
        return self._data.get(PUB_JOURNAL, None)
    @journal.setter
    def journal(self, value: str):
        if value is not None:
            self._data[PUB_JOURNAL] = value
    @property
    def volume(self):
        return self._data.get(PUB_VOLUME, None)
    @volume.setter
    def volume(self, value: str):
        if value is not None:
            self._data[PUB_VOLUME] = value
    @property
    def issue(self):
        return self._data.get(PUB_ISSUE, None)
    @issue.setter
    def issue(self, value: str):
        if value is not None:
            self._data[PUB_ISSUE] = value
    @property
    def first_page(self):
        return self._data.get(PUB_FIRST_PAGE, None)
    @first_page.setter
    def first_page(self, value: str):
        if value is not None:
            self._data[PUB_FIRST_PAGE] = value
    @property
    def last_page(self):
        return self._data.get(PUB_LAST_PAGE, None)
    @last_page.setter
    def last_page(self, value: str):
        if value is not None:
            self._data[PUB_LAST_PAGE] = value
    @property
    def book_title(self):
        return self._data.get(BOOK_TITLE, None)
    @book_title.setter
    def book_title(self, value: str):
        if value is not None:
            self._data[BOOK_TITLE] = value
    @property
    def editors(self):
        return self._data.get(BOOK_EDITOR, None)
    @editors.setter
    def editors(self, value: str):
        if value is not None:
            self._data[BOOK_EDITOR] = value
    @property
    def publisher(self):
        return self._data.get(BOOK_PUBLISHER, None)
    @publisher.setter
    def publisher(self, value: str):
        if value is not None:
            self._data[BOOK_PUBLISHER] = value
    @property
    def year(self) -> int:
        return self._data.get('Year', None)
    @year.setter
    def year(self, value: int):
        if value is not None:
            self._data['Year'] = value
--- a/mirri/entities/sequence.py
+++ b/mirri/entities/sequence.py
@ -0,0 +1,45 @@
 from mirri.entities._private_classes import _FieldBasedClass
 from mirri.settings import (
    ALLOWED_MARKER_TYPES,
    MARKER_INSDC,
    MARKER_SEQ,
    MARKER_TYPE)
 from mirri import  ValidationError
 class GenomicSequence(_FieldBasedClass):
    _fields = [
        {"attribute": "marker_type", "label": MARKER_TYPE},
        {"attribute": "marker_id", "label": MARKER_INSDC},
        {"attribute": "marker_seq", "label": MARKER_SEQ},
    ]
    @property
    def marker_type(self):
        return self._data.get(MARKER_TYPE, None)
    @marker_type.setter
    def marker_type(self, value: str):
        if value is not None:
            types = " ".join([m["acronym"] for m in ALLOWED_MARKER_TYPES])
            if value not in types:
                msg = f"{value} not in allowed marker types: {types}"
                raise ValidationError(msg)
            self._data[MARKER_TYPE] = value
    @property
    def marker_id(self) -> str:
        return self._data.get(MARKER_INSDC, None)
    @marker_id.setter
    def marker_id(self, value: str):
        self._data[MARKER_INSDC] = value
    @property
    def marker_seq(self) -> str:
        return self._data.get(MARKER_SEQ, None)
    @marker_seq.setter
    def marker_seq(self, value: str):
        self._data[MARKER_SEQ] = value
--- a/mirri/entities/strain.py
+++ b/mirri/entities/strain.py
--- a/mirri/io/init.py
+++ b/mirri/io/init.py
--- a/mirri/io/parsers/init.py
+++ b/mirri/io/parsers/init.py
--- a/mirri/io/parsers/excel.py
+++ b/mirri/io/parsers/excel.py
@ -0,0 +1,79 @@
 from io import BytesIO
 from openpyxl import load_workbook
 def excel_dict_reader(fhand, sheet_name, mandatory_column_name=None):
    fhand.seek(0)
    wb = load_workbook(filename=BytesIO(fhand.read()), data_only=True,
                       read_only=True)
    return workbook_sheet_reader(wb, sheet_name, mandatory_column_name=mandatory_column_name)
 def is_none(value):
    return value is None
 def workbook_sheet_reader(workbook, sheet_name, mandatory_column_name=None,
                          allowed_empty_line_slots=5):
    try:
        sheet = workbook[sheet_name]
    except KeyError as error:
        raise ValueError(f"The '{sheet_name}' sheet is missing.") from error
    first = True
    header = []
    empty_lines = 0
    for row in sheet.rows:
        values = []
        for cell in row:
            if cell.value is not None and cell.data_type == 's':
                value = str(cell.value).strip()
            else:
                value = cell.value
            values.append(value)
        # values = [cell.value.strip() for cell in row]
        if first:
            header = values
            first = False
            continue
        if not any(values):
            empty_lines += 1
            if empty_lines >= allowed_empty_line_slots:
                break
            continue
        empty_lines = 0
        data = dict(zip(header, values))
        if mandatory_column_name is not None and not data[mandatory_column_name]:
            # msg = f"Exiting before end of sheet {sheet_name} ends.\n"
            # msg += f"Mandatory column ({mandatory_column_name}) empty. \n"
            # msg += "Check file for empty lines"
            # print(msg)
            continue
        yield data
 def get_all_cell_data_from_sheet(workbook, sheet_name, allowed_empty_line_slots=5):
    try:
        sheet = workbook[sheet_name]
    except KeyError as error:
        raise ValueError(f"The '{sheet_name}' sheet is missing.") from error
    empty_lines = 0
    all_values = []
    for row in sheet.rows:
        values = []
        for cell in row:
            if cell.value is not None and cell.data_type == 's':
                value = str(cell.value).strip()
            else:
                value = cell.value
            values.append(value)
        if not any(values):
            empty_lines += 1
            if empty_lines >= allowed_empty_line_slots:
                break
            continue
        empty_lines = 0
        all_values.extend(values)
    return all_values
--- a/mirri/io/parsers/mirri_excel.py
+++ b/mirri/io/parsers/mirri_excel.py
@ -0,0 +1,276 @@
 import re
 from datetime import date
 from io import BytesIO
 import pycountry
 from openpyxl import load_workbook
 from mirri import rsetattr, ValidationError
 from mirri.biolomics.serializers.sequence import GenomicSequenceBiolomics
 from mirri.biolomics.serializers.strain import StrainMirri
 from mirri.entities.growth_medium import GrowthMedium
 from mirri.io.parsers.excel import workbook_sheet_reader
 from mirri.entities.publication import Publication
 from mirri.entities.date_range import DateRange
 from mirri.entities.strain import OrganismType, StrainId, add_taxon_to_strain
 from mirri.settings import (COMMERCIAL_USE_WITH_AGREEMENT, GENOMIC_INFO,
                            GROWTH_MEDIA, LITERATURE_SHEET, LOCATIONS,
                            MIRRI_FIELDS, NAGOYA_DOCS_AVAILABLE, NAGOYA_NO_RESTRICTIONS,
                            NAGOYA_PROBABLY_SCOPE, NO_RESTRICTION,
                            ONLY_RESEARCH, ONTOBIOTOPE,
                            PUBLICATION_FIELDS, STRAINS, SUBTAXAS)
 from mirri.utils import get_country_from_name
 RESTRICTION_USE_TRANSLATOR = {
    1: NO_RESTRICTION,
    2: ONLY_RESEARCH,
    3: COMMERCIAL_USE_WITH_AGREEMENT,
 }
 NAGOYA_TRANSLATOR = {
    1: NAGOYA_NO_RESTRICTIONS,
    2: NAGOYA_DOCS_AVAILABLE,
    3: NAGOYA_PROBABLY_SCOPE,
 }
 TRUEFALSE_TRANSLATOR = {
    1: False,
    2: True
 }
 def parse_mirri_excel(fhand, version="20200601"):
    if version == "20200601":
        return _parse_mirri_v20200601(fhand)
    else:
        raise NotImplementedError("Only version 20200601 is implemented")
 def _parse_mirri_v20200601(fhand):
    fhand.seek(0)
    file_content = BytesIO(fhand.read())
    wb = load_workbook(filename=file_content, read_only=True, data_only=True)
    locations = workbook_sheet_reader(wb, LOCATIONS)
    ontobiotopes = workbook_sheet_reader(wb, ONTOBIOTOPE)
    growth_media = list(parse_growth_media(wb))
    markers = workbook_sheet_reader(wb, GENOMIC_INFO)
    publications = list(parse_publications(wb))
    strains = parse_strains(wb, locations=locations,  growth_media=growth_media,
                            markers=markers, publications=publications,
                            ontobiotopes=ontobiotopes)
    return {"strains": strains, "growth_media": growth_media}
 def index_list_by(list_, id_):
    return {str(item[id_]): item for item in list_}
 def index_list_by_attr(list_, id_):
    return {str(getattr(item, id_)): item for item in list_}
 def index_markers(markers):
    indexed_markers = {}
    for marker in markers:
        strain_id = marker["Strain AN"]
        if strain_id not in indexed_markers:
            indexed_markers[strain_id] = []
        indexed_markers[strain_id].append(marker)
    return indexed_markers
 def remove_hard_lines(string=None):
    if string is not None and string != '':
        return re.sub(r'\r+\n+|\t+', '', string).strip()
    else:
        return None
 def parse_growth_media(wb):
    for row in workbook_sheet_reader(wb, GROWTH_MEDIA):
        gm = GrowthMedium()
        gm.acronym = str(row['Acronym'])
        gm.description = row['Description']
        gm.full_description = remove_hard_lines(row.get('Full description', None))
        yield gm
 def parse_publications(wb):
    ids = []
    for row in workbook_sheet_reader(wb, LITERATURE_SHEET):
        pub = Publication()
        for pub_field in PUBLICATION_FIELDS:
            label = pub_field["label"]
            col_val = row.get(label, None)
            if col_val:
                attribute = pub_field["attribute"]
                setattr(pub, attribute, col_val)
        yield pub
 def parse_strains(wb, locations, growth_media, markers, publications,
                  ontobiotopes):
    ontobiotopes_by_id = {str(ont["ID"]): ont['Name'] for ont in ontobiotopes}
    ontobiotopes_by_name = {v: k for k, v in ontobiotopes_by_id.items()}
    locations = index_list_by(locations, 'Locality')
    growth_media = index_list_by_attr(growth_media, 'acronym')
    publications = index_list_by_attr(publications, 'id')
    markers = index_markers(markers)
    for strain_row in workbook_sheet_reader(wb, STRAINS, "Accession number"):
        strain = StrainMirri()
        strain_id = None
        label = None
        for field in MIRRI_FIELDS:
            label = field["label"]
            attribute = field["attribute"]
            value = strain_row[label]
            if value is None or value == '':
                continue
            if attribute == "id":
                collection, number = value.split(" ", 1)
                value = StrainId(collection=collection, number=number)
                rsetattr(strain, attribute, value)
            elif attribute == "restriction_on_use":
                rsetattr(strain, attribute, RESTRICTION_USE_TRANSLATOR[value])
            elif attribute == "nagoya_protocol":
                rsetattr(strain, attribute, NAGOYA_TRANSLATOR[value])
            elif attribute == "other_numbers":
                other_numbers = []
                for on in value.split(";"):
                    on = on.strip()
                    try:
                        collection, number = on.split(" ", 1)
                    except ValueError:
                        collection = None
                        number = on
                    _id = StrainId(collection=collection, number=number)
                    other_numbers.append(_id)
                rsetattr(strain, attribute, other_numbers)
            elif attribute == "taxonomy.taxon_name":
                try:
                    add_taxon_to_strain(strain, value)
                except ValueError:
                    msg = f"The '{label}' for strain with Accession Number {strain_id} is not according to the specification."
                    raise ValidationError(msg)
            elif attribute == "taxonomy.organism_type":
                value = [OrganismType(val.strip())
                         for val in str(value).split(";")]
                rsetattr(strain, attribute, value)
            elif attribute in ("deposit.date", "collect.date", "isolation.date",
                               "catalog_inclusion_date"):
                if isinstance(value, date):
                    value = DateRange(
                        year=value.year, month=value.month, day=value.day
                    )
                elif isinstance(value, str):
                    value = DateRange().strpdate(value)
                else:
                    raise NotImplementedError()
                rsetattr(strain, attribute, value)
            elif attribute == 'growth.recommended_temp':
                temps = value.split(';')
                if len(temps) == 1:
                    _min, _max = float(temps[0]), float(temps[0])
                else:
                    _min, _max = float(temps[0]), float(temps[1])
                rsetattr(strain, attribute, {'min': _min, 'max': _max})
            elif attribute == "growth.recommended_media":
                sep = "/"
                if ";" in value:
                    sep = ";"
                growth_media = [v.strip() for v in value.split(sep)]
                rsetattr(strain, attribute, growth_media)
            elif attribute == 'growth.tested_temp_range':
                if value:
                    min_, max_ = value.split(";")
                    value = {'min': float(min_), 'max': float(max_)}
                    rsetattr(strain, attribute, value)
            elif attribute == "form_of_supply":
                rsetattr(strain, attribute, value.split(";"))
            elif attribute == "collect.location.coords":
                items = value.split(";")
                strain.collect.location.latitude = float(items[0])
                strain.collect.location.longitude = float(items[1])
                if len(items) > 2:
                    strain.collect.location.coord_uncertainty = items[2]
            elif attribute == "collect.location":
                location = locations[value]
                if 'Country' in location and location['Country']:
                    if location['Country'] == 'Unknown':
                        continue
                    country_3 = _get_country_alpha3(location['Country'])
                    strain.collect.location.country = country_3
                strain.collect.location.state = location["Region"]
                strain.collect.location.municipality = location["City"]
                strain.collect.location.site = location["Locality"]
            elif attribute in ("abs_related_files", "mta_files"):
                rsetattr(strain, attribute, value.split(";"))
            elif attribute in ("is_from_registered_collection",
                               "is_subject_to_quarantine", 'taxonomy.interspecific_hybrid',
                               "is_potentially_harmful", "genetics.gmo"):
                rsetattr(strain, attribute, TRUEFALSE_TRANSLATOR[value])
            elif attribute == "publications":
                value = str(value)
                pubs = []
                pub_ids = [v.strip() for v in str(value).split(";")]
                for pub_id in pub_ids:
                    pub = publications.get(pub_id, None)
                    if pub is None:
                        pub = Publication()
                        if '/' in pub_id:
                            pub.doi = pub_id
                        else:
                            pub.pubmed_id = pub_id
                    pubs.append(pub)
                rsetattr(strain, attribute, pubs)
            elif attribute == 'ontobiotope':
                values = []
                for val in value.split(';'):
                    if val not in ontobiotopes_by_id:
                        val = ontobiotopes_by_name[val]
                    values.append(val)
                rsetattr(strain, attribute, value)
            elif attribute == 'other_denominations':
                value = [v.strip() for v in value.split(';')]
                rsetattr(strain, attribute, value)
            elif attribute == 'genetics.plasmids':
                value = [v.strip() for v in value.split(';')]
                rsetattr(strain, attribute, value)
            else:
                #print(attribute, value, type(value))
                rsetattr(strain, attribute, value)
        # add markers
        strain_id = strain.id.strain_id
        if strain_id in markers:
            for marker in markers[strain_id]:
                _marker = GenomicSequenceBiolomics()
                _marker.marker_id = marker["INSDC AN"]
                _marker.marker_type = marker["Marker"]
                _marker.marker_seq = marker["Sequence"]
                strain.genetics.markers.append(_marker)
        yield strain
 def _get_country_alpha3(loc_country):
    if loc_country == 'INW':
        return loc_country
    country = get_country_from_name(loc_country)
    if not country:
        country = pycountry.countries.get(alpha_3=loc_country)
    if not country:
        country = pycountry.historic_countries.get(alpha_3=loc_country)
    country_3 = country.alpha_3
    return country_3
--- a/mirri/io/writers/init.py
+++ b/mirri/io/writers/init.py
--- a/mirri/io/writers/mirri_excel.py
+++ b/mirri/io/writers/mirri_excel.py
@ -0,0 +1,305 @@
 import csv
 from copy import deepcopy
 from openpyxl.workbook.workbook import Workbook
 from mirri import rgetattr
 from mirri.settings import GROWTH_MEDIA, MIRRI_FIELDS, DATA_DIR, PUBLICATION_FIELDS
 from mirri.io.parsers.mirri_excel import NAGOYA_TRANSLATOR, RESTRICTION_USE_TRANSLATOR
 INITIAL_SEXUAL_STATES = [
    "Mata",
    "Matalpha",
    "Mata/Matalpha",
    "Mata",
    "Matb",
    "Mata/Matb",
    "MTLa",
    "MTLalpha",
    "MTLa/MTLalpha",
    "MAT1-1",
    "MAT1-2",
    "MAT1",
    "MAT2",
    "MT+",
    "MT-",
    "MT+",
    "MT-",
    "H+",
    "H-",
 ]
 MARKER_FIELDS = [
    {"attribute": "acronym", "label": "Acronym", "mandatory": True},
    {"attribute": "marker", "label": "Marker", "mandatory": True},
 ]
 MARKER_DATA = [
    {"acronym": "16S rRNA", "marker": "16S rRNA"},
    {"acronym": "ACT", "marker": "Actin"},
    {"acronym": "CaM", "marker": "Calmodulin"},
    {"acronym": "EF-1α", "marker": "elongation factor 1-alpha (EF-1α)"},
    {"acronym": "ITS", "marker": "nuclear ribosomal Internal Transcribed Spacer (ITS)"},
    {"acronym": "LSU", "marker": "nuclear ribosomal Large SubUnit (LSU)"},
    {"acronym": "RPB1", "marker": "Ribosomal RNA-coding genes RPB1"},
    {"acronym": "RPB2", "marker": "Ribosomal RNA-coding genes RPB2"},
    {"acronym": "TUBB", "marker": "β-Tubulin"},
 ]
 REV_RESTRICTION_USE_TRANSLATOR = {v: k for k, v in RESTRICTION_USE_TRANSLATOR.items()}
 REV_NAGOYA_TRANSLATOR = {v: k for k, v in NAGOYA_TRANSLATOR.items()}
 PUB_HEADERS = [pb["label"] for pb in PUBLICATION_FIELDS]
 def write_mirri_excel(path, strains, growth_media, version):
    if version == "20200601":
        _write_mirri_excel_20200601(path, strains, growth_media)
 def _write_mirri_excel_20200601(path, strains, growth_media):
    wb = Workbook()
    write_markers_sheet(wb)
    ontobiotope_path = DATA_DIR / "ontobiotopes.csv"
    write_ontobiotopes(wb, ontobiotope_path)
    write_growth_media(wb, growth_media)
    growth_media_indexes = [str(gm.acronym) for gm in growth_media]
    locations = {}
    publications = {}
    sexual_states = set(deepcopy(INITIAL_SEXUAL_STATES))
    genomic_markers = {}
    strains_data = _deserialize_strains(strains, locations, growth_media_indexes,
                                        publications, sexual_states, genomic_markers)
    strains_data = list(strains_data)
    # write strain to generate indexed data
    strain_sheet = wb.create_sheet("Strains")
    strain_sheet.append([field["label"] for field in MIRRI_FIELDS])
    for strain_row in strains_data:
        strain_sheet.append(strain_row)
    redimension_cell_width(strain_sheet)
    # write locations
    loc_sheet = wb.create_sheet("Geographic origin")
    loc_sheet.append(["ID", "Country", "Region", "City", "Locality"])
    for index, loc_index in enumerate(locations.keys()):
        location = locations[loc_index]
        row = [index, location.country, location.state, location.municipality,
               loc_index]
        loc_sheet.append(row)
    redimension_cell_width(loc_sheet)
    # write publications
    pub_sheet = wb.create_sheet("Literature")
    pub_sheet.append(PUB_HEADERS)
    for publication in publications.values():
        row = []
        for pub_field in PUBLICATION_FIELDS:
            # if pub_field['attribute'] == 'id':
            #     value = index
            value = getattr(publication, pub_field['attribute'], None)
            row.append(value)
        pub_sheet.append(row)
    redimension_cell_width(pub_sheet)
    # write sexual states
    sex_sheet = wb.create_sheet("Sexual states")
    for sex_state in sorted(list(sexual_states)):
        sex_sheet.append([sex_state])
    redimension_cell_width(sex_sheet)
    # write genetic markers
    markers_sheet = wb.create_sheet("Genomic information")
    markers_sheet.append(['Strain AN', 'Marker', 'INSDC AN', 'Sequence'])
    for strain_id, markers in genomic_markers.items():
        for marker in markers:
            row = [strain_id, marker.marker_type, marker.marker_id, marker.marker_seq]
            markers_sheet.append(row)
    redimension_cell_width(markers_sheet)
    del wb["Sheet"]
    wb.save(str(path))
 def _deserialize_strains(strains, locations, growth_media_indexes,
                         publications, sexual_states, genomic_markers):
    for strain in strains:
        strain_row = []
        for field in MIRRI_FIELDS:
            attribute = field["attribute"]
            if attribute == "id":
                value = strain.id.strain_id
            elif attribute == "restriction_on_use":
                value = rgetattr(strain, attribute)
                if value is not None:
                    value = REV_RESTRICTION_USE_TRANSLATOR[value]
            elif attribute == "nagoya_protocol":
                value = rgetattr(strain, attribute)
                if value:
                    value = REV_NAGOYA_TRANSLATOR[value]
            elif attribute == "other_numbers":
                value = rgetattr(strain, attribute)
                if value is not None:
                    value = [f"{on.collection} {on.number}" for on in value]
                    value = "; ".join(value)
            elif attribute == 'other_denominations':
                od = strain.other_denominations
                value = "; ".join(od) if od else None
            elif attribute in (
                "is_from_registered_collection",
                "is_subject_to_quarantine",
                "is_potentially_harmful",
                "genetics.gmo",
                "taxonomy.interspecific_hybrid"
            ):
                value = rgetattr(strain, attribute)
                if value is True:
                    value = 2
                elif value is False:
                    value = 1
                else:
                    value = None
            elif attribute == "taxonomy.taxon_name":
                value = strain.taxonomy.long_name
            elif attribute in ("deposit.date", "collect.date", "isolation.date",
                               'catalog_inclusion_date'):
                value = rgetattr(strain, attribute)
                value = value.strfdate if value else None
            elif attribute == "growth.recommended_media":
                value = rgetattr(strain, attribute)
                if value is not None:
                    for gm in value:
                        gm = str(gm)
                        if gm not in growth_media_indexes:
                            print(gm, growth_media_indexes)
                            msg = f"Growth media {gm} not in the provided ones"
                            continue
                            raise ValueError(msg)
                    value = "/".join(value)
            elif attribute in ('growth.tested_temp_range',
                               "growth.recommended_temp"):
                value = rgetattr(strain, attribute)
                if value:
                    value = f'{value["min"]}; {value["max"]}'
            elif attribute == "form_of_supply":
                value = rgetattr(strain, attribute)
                value = ";".join(value)
            elif attribute == "collect.location.coords":
                lat = strain.collect.location.latitude
                long = strain.collect.location.longitude
                if lat is not None and long is not None:
                    value = f"{lat};{long}"
                else:
                    value = None
            elif attribute == "collect.location":
                location = strain.collect.location
                loc_index = _build_location_index(location)
                if loc_index is None:
                    continue
                if loc_index not in locations:
                    locations[loc_index] = location
                value = loc_index
            elif attribute in ("abs_related_files", "mta_files"):
                value = rgetattr(strain, attribute)
                value = ";".join(value) if value else None
            elif attribute == "taxonomy.organism_type":
                value = rgetattr(strain, attribute)
                if value:
                    value = "; ".join([str(v.code) for v in value])
            elif attribute == "history":
                value = rgetattr(strain, attribute)
                if value is not None:
                    value = " < ".join(value)
            elif attribute == "genetics.sexual_state":
                value = rgetattr(strain, attribute)
                if value:
                    sexual_states.add(value)
            elif attribute == "genetics.ploidy":
                value = rgetattr(strain, attribute)
            elif attribute == "taxonomy.organism_type":
                organism_types = rgetattr(strain, attribute)
                if organism_types is not None:
                    value = [org_type.code for org_type in organism_types]
                    value = ";".join(value)
            elif attribute == 'publications':
                value = []
                for pub in strain.publications:
                    value.append(pub.id)
                    if pub.id not in publications:
                        publications[pub.id] = pub
                value = ';'.join(str(v) for v in value) if value else None
            elif attribute == 'genetics.plasmids':
                value = rgetattr(strain, attribute)
                if value is not None:
                    value = ';'.join(value)
            else:
                value = rgetattr(strain, attribute)
            strain_row.append(value)
        genomic_markers[strain.id.strain_id] = strain.genetics.markers
        yield strain_row
 def _build_location_index(location):
    index = []
    if location.country:
        index.append(location.country)
    if location.site:
        index.append(location.site)
    return ';'.join(index) if index else None
 def write_markers_sheet(wb):
    sheet = wb.create_sheet("Markers")
    _write_work_sheet(
        sheet,
        labels=[f["label"] for f in MARKER_FIELDS],
        attributes=[f["attribute"] for f in MARKER_FIELDS],
        data=MARKER_DATA,
    )
    redimension_cell_width(sheet)
 def write_ontobiotopes(workbook, ontobiotype_path):
    ws = workbook.create_sheet("Ontobiotope")
    with ontobiotype_path.open() as fhand:
        for row in csv.reader(fhand, delimiter="\t"):
            ws.append(row)
    redimension_cell_width(ws)
 def _write_work_sheet(sheet, labels, attributes, data):
    sheet.append(labels)
    for row in data:
        row_data = [row[field] for field in attributes]
        sheet.append(row_data)
    redimension_cell_width(sheet)
 def write_growth_media(wb, growth_media):
    ws = wb.create_sheet(GROWTH_MEDIA)
    ws.append(["Acronym", "Description", "Full description"])
    for growth_medium in growth_media:
        row = [
            growth_medium.acronym,
            growth_medium.description,
            growth_medium.full_description,
        ]
        ws.append(row)
    redimension_cell_width(ws)
 def redimension_cell_width(ws):
    dims = {}
    for row in ws.rows:
        for cell in row:
            if cell.value:
                max_ = max((dims.get(cell.column_letter, 0), len(str(cell.value))))
                dims[cell.column_letter] = max_
    for col, value in dims.items():
        ws.column_dimensions[col].width = value
--- a/mirri/settings.py
+++ b/mirri/settings.py
@ -0,0 +1,296 @@
 from pathlib import Path
 DATA_DIR = Path(__file__).parent / "data"
 ACCESSION_NUMBER = "accession_number"
 RESTRICTION_ON_USE = "restriction_on_use"
 NAGOYA_PROTOCOL = "nagoya_protocol"
 ABS_RELATED_FILES = "abs_related_files"
 MTA_FILES = "mta_file"
 OTHER_CULTURE_NUMBERS = "other_culture_collection_numbers"
 STRAIN_FROM_REGISTERED_COLLECTION = "strain_from_a_registered_collection"
 RISK_GROUP = "risk_group"
 DUAL_USE = "dual_use"
 QUARANTINE = "quarantine"
 ORGANISM_TYPE = "organism_type"
 TAXON_NAME = "taxon_name"
 INFRASUBSPECIFIC_NAME = "infrasubspecific_names"
 COMMENTS_ON_TAXONOMY = "comments_on_taxonomy"
 STATUS = "status"
 HISTORY_OF_DEPOSIT = "history_of_deposit"
 DEPOSITOR = "depositor"
 DATE_OF_DEPOSIT = "date_of_deposit"
 COLLECTED_BY = "collected_by"
 DATE_OF_COLLECTION = "date_of_collection"
 ISOLATED_BY = "isolated_by"
 DATE_OF_ISOLATION = "date_of_isolation"
 DATE_OF_INCLUSION = "date_of_inclusion_on_catalog"
 TESTED_TEMPERATURE_GROWTH_RANGE = "tested_temperature_growth_range"
 RECOMMENDED_GROWTH_TEMP = "recommended_growth_temperature"
 RECOMMENDED_GROWTH_MEDIUM = "recommended_media_for_growth"
 FORM_OF_SUPPLY = "form_of_supply"
 GEO_COORDS = "coordinates_of_geographic_origin"
 ACCESSION_NAME = "other_denomination"
 ALTITUDE = "altitude_of_geographic_origin"
 GEOGRAPHIC_ORIGIN = "geographic_origin"
 GMO = "gmo"
 GMO_CONSTRUCTION_INFO = "gmo_construction_information"
 MUTANT_INFORMATION = "mutant_information"
 GENOTYPE = "genotype"
 LITERATURE = "literature"
 SEXUAL_STATE = "sexual_state"
 PLOIDY = "ploidy"
 INTERSPECIFIC_HYBRID = "interspecific_hybrid"
 HYBRIDS = 'hybrids'
 PLANT_PATHOGENICITY_CODE = "plant_pathogenicity_code"
 PATHOGENICITY = "pathogenicity"
 ENZYME_PRODUCTION = "enzyme_production"
 PRODUCTION_OF_METABOLITES = "production_of_metabolites"
 APPLICATIONS = "applications"
 REMARKS = "remarks"
 PLASMIDS = "plasmids"
 PLASMIDS_COLLECTION_FIELDS = "plasmids_collections_fields"
 SUBSTRATE_HOST_OF_ISOLATION = "substrate_host_of_isolation"
 ISOLATION_HABITAT = "isolation_habitat"
 ONTOBIOTOPE_ISOLATION_HABITAT = "ontobiotope_term_for_the_isolation_habitat"
 LITERATURE_LINKED_TO_SEQ_GENOME = "literature_linked_to_the_sequence_genome"
 # StrainId
 STRAIN_ID = "id"
 COLLECTION_CODE = "collection_code"
 STRAIN_PUI = "strain_pui"
 STRAIN_URL = "strain_url"
 ID_SYNONYMS = 'id_synonyms'
 # Taxonomy
 GENUS = "genus"
 SPECIES = "species"
 # Location
 COUNTRY = "countryOfOriginCode"
 SITE = "site"
 STATE = "state"
 PROVINCE = "province"
 MUNICIPALITY = "municipality"
 ISLAND = "island"
 OTHER = "other"
 LATITUDE = "latitude"
 LONGITUDE = "longitude"
 ALTITUDE = "altitude"
 GEOREF_METHOD = "georeferencingMethod"
 COORDUNCERTAINTY = "coordUncertainty"
 COORD_SPATIAL_REFERENCE = "coordenatesSpatialReference"
 LOCATION = "location"
 ALLOWED_COLLECTING_SITE_KEYS = [
    COUNTRY,
    STATE,
    PROVINCE,
    ISLAND,
    MUNICIPALITY,
    OTHER,
    SITE,
    LATITUDE,
    LONGITUDE,
    ALTITUDE,
    GEOREF_METHOD,
    COORDUNCERTAINTY,
    COORD_SPATIAL_REFERENCE,
 ]
 MIRRI_FIELDS = [
    {"attribute": "id", "label": "Accession number"},
    {"attribute": "restriction_on_use", "label": "Restrictions on use"},
    {"attribute": "nagoya_protocol",
        "label": "Nagoya protocol restrictions and compliance conditions"},
    {"attribute": ABS_RELATED_FILES, "label": "ABS related files"},
    {"attribute": "mta_files", "label": "MTA file"},
    {"attribute": "other_numbers", "label": "Other culture collection numbers"},
    {"attribute": "is_from_registered_collection",
        "label": "Strain from a registered collection"},
    {"attribute": "risk_group", "label": "Risk Group"},
    {"attribute": "is_potentially_harmful", "label": "Dual use"},
    {"attribute": "is_subject_to_quarantine", "label": "Quarantine in Europe"},
    {"attribute": "taxonomy.organism_type", "label": "Organism type"},
    {"attribute": "taxonomy.taxon_name", "label": "Taxon name"},
    {"attribute": "taxonomy.infrasubspecific_name",
        "label": "Infrasubspecific names"},
    {"attribute": "taxonomy.comments", "label": "Comment on taxonomy"},
    {"attribute": "taxonomy.interspecific_hybrid",
        "label": "Interspecific hybrid"},
    {"attribute": "status", "label": "Status"},
    {"attribute": "history", "label": "History of deposit", },
    {"attribute": "deposit.who", "label": "Depositor"},
    {"attribute": "deposit.date", "label": "Date of deposit"},
    {"attribute": "catalog_inclusion_date",
        "label": "Date of inclusion in the catalogue"},
    {"attribute": "collect.who", "label": "Collected by"},
    {"attribute": "collect.date", "label": "Date of collection"},
    {"attribute": "isolation.who", "label": "Isolated by"},
    {"attribute": "isolation.date", "label": "Date of isolation"},
    {"attribute": "isolation.substrate_host_of_isolation",
        "label": "Substrate/host of isolation"},
    {"attribute": "growth.tested_temp_range",
        "label": "Tested temperature growth range"},
    {"attribute": "growth.recommended_temp",
        "label": "Recommended growth temperature"},
    {"attribute": "growth.recommended_media",
        "label": "Recommended medium for growth"},
    {"attribute": "form_of_supply", "label": "Form of supply"},
    {"attribute": "other_denominations", "label": "Other denomination"},
    {"attribute": "collect.location.coords",
        "label": "Coordinates of geographic origin"},
    {"attribute": "collect.location.altitude",
        "label": "Altitude of geographic origin"},
    {"attribute": "collect.location", "label": "Geographic origin"},
    {"attribute": "collect.habitat", "label": "Isolation habitat"},
    {"attribute": "collect.habitat_ontobiotope",
        "label": "Ontobiotope term for the isolation habitat"},
    {"attribute": "genetics.gmo", "label": "GMO"},
    {"attribute": "genetics.gmo_construction",
        "label": "GMO construction information"},
    {"attribute": "genetics.mutant_info", "label": "Mutant information"},
    {"attribute": "genetics.genotype", "label": "Genotype"},
    {"attribute": "genetics.sexual_state", "label": "Sexual state"},
    {"attribute": "genetics.ploidy", "label": "Ploidy"},
    {"attribute": "genetics.plasmids", "label": "Plasmids"},
    {"attribute": "genetics.plasmids_in_collections",
        "label": "Plasmids collections fields"},
    {"attribute": "publications", "label": "Literature"},
    {"attribute": PLANT_PATHOGENICITY_CODE, "label": "Plant pathogenicity code"},
    {"attribute": "pathogenicity", "label": "Pathogenicity"},
    {"attribute": "enzyme_production", "label": "Enzyme production"},
    {"attribute": "production_of_metabolites",
        "label": "Production of metabolites"},
    {"attribute": "applications", "label": "Applications", },
    {"attribute": "remarks", "label": "Remarks"},
    {"attribute": LITERATURE_LINKED_TO_SEQ_GENOME,
        "label": "Literature linked to the sequence/genome"},
 ]
 ALLOWED_SUBTAXA = ["subspecies", "variety", "convarietas", "group", "forma",
                   'forma.specialis']
 ALLOWED_TAXONOMIC_RANKS = ["family", "genus", "species"] + ALLOWED_SUBTAXA
 # nagoya
 NAGOYA_NO_RESTRICTIONS = "no_known_restrictions_under_the_Nagoya_protocol"
 NAGOYA_DOCS_AVAILABLE = "documents_providing_proof_of_legal_access_and_terms_of_use_available_at_the_collection"
 NAGOYA_PROBABLY_SCOPE = "strain_probably_in_scope,_please_contact_the_culture_collection"
 ALLOWED_NAGOYA_OPTIONS = [NAGOYA_NO_RESTRICTIONS,
                          NAGOYA_DOCS_AVAILABLE, NAGOYA_PROBABLY_SCOPE]
 # Use restriction
 NO_RESTRICTION = "no_restriction"
 ONLY_RESEARCH = "only_research"
 COMMERCIAL_USE_WITH_AGREEMENT = "commercial_use_with_agreement"
 ALLOWED_RESTRICTION_USE_OPTIONS = [
    NO_RESTRICTION,
    ONLY_RESEARCH,
    COMMERCIAL_USE_WITH_AGREEMENT,
 ]
 ALLOWED_RISK_GROUPS = ["1", "2", "3", "4"]
 AGAR = "Agar"
 CRYO = "Cryo"
 DRY_ICE = "Dry Ice"
 LIQUID_CULTURE_MEDIUM = "Liquid Culture Medium"
 LYO = "Lyo"
 OIL = "Oil"
 WATER = "Water"
 ALLOWED_FORMS_OF_SUPPLY = [AGAR, CRYO, DRY_ICE,
                           LIQUID_CULTURE_MEDIUM, LYO, OIL, WATER]
 DEPOSIT = "deposit"
 ISOLATION = "isolation"
 COLLECT = "collect"
 GROWTH = "growth"
 GENETICS = "genetics"
 TAXONOMY = "taxonomy"
 # Markers
 MARKERS = "markers"
 MARKER_TYPE = "marker_type"
 MARKER_INSDC = "INSDC"
 MARKER_SEQ = "marker_seq"
 ALLOWED_MARKER_TYPES = [
    {"acronym": "16S rRNA", "marker": "16S rRNA"},
    {"acronym": "ACT", "marker": "Actin"},
    {"acronym": "CaM", "marker": "Calmodulin"},
    {"acronym": "EF-1α", "marker": "elongation factor 1-alpha (EF-1α)"},
    {"acronym": "ITS",
        "marker": "nuclear ribosomal Internal Transcribed Spacer (ITS)"},
    {"acronym": "LSU", "marker": "nuclear ribosomal Large SubUnit (LSU)"},
    {"acronym": "RPB1", "marker": "Ribosomal RNA-coding genes RPB1"},
    {"acronym": "RPB2", "marker": "Ribosomal RNA-coding genes RPB2"},
    {"acronym": "TUBB", "marker": "β-Tubulin"},
 ]
 PUBLICATIONS = "publications"
 PUB_ID = "id"
 PUB_DOI = "pub_doi"
 PUB_PUBMED_ID = ''
 PUB_FULL_REFERENCE = "full_reference"
 PUB_TITLE = "title"
 PUB_AUTHORS = "authors"
 PUB_JOURNAL = "journal"
 PUB_YEAR = "year"
 PUB_VOLUME = "volume"
 PUB_ISSUE = "issue"
 PUB_FIRST_PAGE = "first_page"
 PUB_LAST_PAGE = "last_page"
 BOOK_TITLE = "book_title"
 BOOK_EDITOR = "book_editor"
 BOOK_PUBLISHER = "book_publisher"
 PUBLICATION_FIELDS = [
    {"label": "ID", "attribute": PUB_ID},
    {"label": "Full reference", "attribute": PUB_FULL_REFERENCE},
    {"label": "Authors", "attribute": PUB_AUTHORS},
    {"label": "Title", "attribute": PUB_TITLE},
    {"label": "Journal", "attribute": PUB_JOURNAL},
    {"label": "Year", "attribute": PUB_YEAR},
    {"label": "Volume", "attribute": PUB_VOLUME},
    {"label": "Issue", "attribute": PUB_ISSUE},
    {"label": "First page", "attribute": PUB_FIRST_PAGE},
    {"label": "Last page", "attribute": PUB_FIRST_PAGE},
    {"label": "Book title", "attribute": BOOK_TITLE},
    {"label": "Editors", "attribute": BOOK_EDITOR},
    {"label": "Publisher", "attribute": BOOK_PUBLISHER},
 ]
 # ploidy
 ANEUPLOID = 0
 HAPLOID = 1
 DIPLOID = 2
 TRIPLOID = 3
 TETRAPLOID = 4
 POLYPLOID = 9
 ALLOWED_PLOIDIES = [ANEUPLOID, HAPLOID, DIPLOID, TRIPLOID, TETRAPLOID,
                    POLYPLOID]
 SUBTAXAS = {
    "subsp.": "subspecies",
    "var.": "variety",
    "convar.": "convarietas",
    "group.": "group",
    "f.": "forma",
    "f.sp.": "forma.specialis"
 }
 # Excel sheet name
 LOCATIONS = "Geographic origin"  # 'Locations'
 GROWTH_MEDIA = "Growth media"
 GENOMIC_INFO = "Genomic information"
 STRAINS = "Strains"
 LITERATURE_SHEET = "Literature"
 SEXUAL_STATE_SHEET = "Sexual states"
 RESOURCE_TYPES_VALUES = "Resource types values"
 FORM_OF_SUPPLY_SHEET = "Forms of supply"
 PLOIDY_SHEET = "Ploidy"
 ONTOBIOTOPE = "Ontobiotope"
 MARKERS = "Markers"
--- a/mirri/utils.py
+++ b/mirri/utils.py
@ -0,0 +1,48 @@
 import pycountry
 class FakeCountry:
    def __init__(self, name=None, code3=None):
        self.code3 = code3
        self.name = name
 def get_pycountry(value):
    if value == 'INW':
        return FakeCountry(name='International Water', code3='INW')
    country = get_country_from_name(value)
    if country is None:
        country = get_country_from_alpha3(value)
    return country
 def get_country_from_name(name):
    country = pycountry.countries.get(name=name)
    try:
        if country is None:
            country = pycountry.countries.get(common_name=name)
        if country is None:
            country = pycountry.countries.get(official_name=name)
        if country is None:
            country = pycountry.historic_countries.get(name=name)
        if country is None:
            country = pycountry.historic_countries.get(common_name=name)
        if country is None:
            country = pycountry.historic_countries.get(official_name=name)
    except (AttributeError, KeyError):
        country = None
    return country
 def get_country_from_alpha3(code):
    country = pycountry.countries.get(alpha_3=code)
    try:
        if country is None:
            country = pycountry.historic_countries.get(alpha_3=code)
    except (AttributeError, KeyError):
        country = None
    return country
--- a/mirri/validation/init.py
+++ b/mirri/validation/init.py
--- a/mirri/validation/entity_validators.py
+++ b/mirri/validation/entity_validators.py
@ -0,0 +1,50 @@
 from mirri import rgetattr
 def validate_strain(strain, version='20200601'):
    if version == '20200601':
        return _validate_strain_v20200601(strain)
    raise NotImplementedError('Only v20200601 is implemented')
 def _validate_strain_v20200601(strain):
    mandatory_attrs = [{'label': 'Accession Number', 'attr': 'id.strain_id'},
                       {'label': 'Nagoya protocol', 'attr': 'nagoya_protocol'},
                       {'label': 'Restriction on use', 'attr': 'restriction_on_use'},
                       {'label': 'Risk group', 'attr': 'risk_group'},
                       {'label': 'Organism type', 'attr': 'taxonomy.organism_type'},
                       {'label': 'Taxon name', 'attr': 'taxonomy.long_name'},
                       {'label': 'Recommended temperature to growth', 'attr': 'growth.recommended_temp'},
                       {'label': 'Recommended media', 'attr': 'growth.recommended_media'},
                       {'label': 'Form of supply', 'attr': 'form_of_supply'},
                       {'label': 'Country', 'attr': 'collect.location.country'}]
    errors = []
    for mandatory in mandatory_attrs:
        value = rgetattr(strain, mandatory['attr'])
        if value is None:
            errors.append(f"{mandatory['label']} is mandatory field")
    if not is_valid_nagoya(strain):
        errors.append('Not compliant wih nagoya protocol requirements')
    return errors
 def is_valid_nagoya(strain):
    # nagoya_requirements
    _date = strain.collect.date
    if _date is None:
        _date = strain.isolation.date
    if _date is None:
        _date = strain.deposit.date
    if _date is None:
        _date = strain.catalog_inclusion_date
    # print(_date)
    year = None if _date is None else _date._year
    if year is not None and year >= 2014 and strain.collect.location.country is None:
        return False
    return True
--- a/mirri/validation/error_logging/init.py
+++ b/mirri/validation/error_logging/init.py
@ -0,0 +1,3 @@
 from .error import Entity, Error
 from .error_message import ErrorMessage
 from .error_log import ErrorLog
--- a/mirri/validation/error_logging/error.py
+++ b/mirri/validation/error_logging/error.py
@ -0,0 +1,119 @@
 from typing import Optional
 from .error_message import ErrorMessage
 class Entity():
    """Entity information
    Args:
        acronym: acronym of the entity. Must be a 3-characters captalized string
    """
    def __init__(self, acronym: str) -> None:
        self.acronym = acronym
    def __str__(self) -> str:
        return f"Entity {self.acronym}: {self.name}"
    @property
    def _acronyms(self) -> list:
        return [
            func
            for func in dir(self)
            if func.isupper() and
            callable(getattr(self, func)) and
            not func.startswith("__")
        ]
    @property
    def _names(self) -> dict:
        return {acr: getattr(self, acr)() for acr in self._acronyms}
    @property
    def name(self) -> str:
        try:
            return self._names[self.acronym]
        except KeyError:
            raise KeyError(f'Unknown acronym {self.acronym}.')
    @property
    def acronym(self) -> str:
        return self._acronym
    @acronym.setter
    def acronym(self, acronym: str) -> None:
        self._acronym = acronym
    def EFS(self) -> str:
        return 'Excel File Structure'
    def GMD(self) -> str:
        return 'Growth Media'
    def GOD(self) -> str:
        return 'Geographic Origin'
    def LID(self) -> str:
        return 'Literature'
    def STD(self) -> str:
        return 'Strains'
    def GID(self) -> str:
        return 'Genomic Information'
    def OTD(self) -> str:
        return 'Ontobiotope'
    def UCT(self) -> str:
        return 'Uncategorized'
 class Error():
    """Error information
        Args:
            message (str): Error message
            entity (Entity, optional): Entity related to the error. If None will default to Uncategorized. Defaults to None.
            data (str, optional): Data used for sorting the messages. Defaults to None.
    """
    def __init__(self, code: str, pk: Optional[str] = None, data: Optional[str] = None) -> None:
        self.code = code.upper()
        self.pk = pk
        self.data = data
    def __str__(self):
        return f"Error {self._code}: {self.message}"
    @property
    def code(self) -> str:
        return self._code
    @code.setter
    def code(self, code: str) -> None:
        self._code = code.upper()
    @property
    def pk(self) -> Optional[str]:
        return self._pk
    @pk.setter
    def pk(self, pk: Optional[str] = None) -> None:
        self._pk = pk
    @property
    def data(self) -> Optional[str]:
        return self._data
    @data.setter
    def data(self, data: Optional[str]):
        self._data = data
    @property
    def entity(self) -> Entity:
        return Entity(self.code[:3])
    @property
    def message(self) -> str:
        return ErrorMessage(self.code, self.pk, self.data).message
--- a/mirri/validation/error_logging/error_log.py
+++ b/mirri/validation/error_logging/error_log.py
@ -0,0 +1,77 @@
 from typing import Optional, Union
 from datetime import datetime
 from .error import Error
 class ErrorLog():
    def __init__(self, input_filename: str, cc: Optional[str] = None, date: Optional[Union[str, datetime]] = None, limit: int = 100):
        """
        Logger for Error instances.
        Args:
            input_filename (str): name of the file to be logged
            cc (str, optional): name of the curator. Defaults to None.
            date (str, optional): date (e.g. created, last modified) associated with the file. Useful for versioning. Defaults to None.
            limit (int, optional): limit of errors to print to the report. Defaults to 100.
        """
        self._input_filename = input_filename
        self._cc = cc
        self._date = date
        self._errors = {}
        self.limit = limit
        self._counter = 0
    def __str__(self) -> str:
        output = f"""Error Log for file {self._input_filename}\nENTITY | CODE   | MESSAGE"""
        for acronym, error_list in self.get_errors().items():
            for error in error_list:
                output += f"\n{acronym:6} | {error.code:6} | {error.message[:100]}"
        return output
    @property
    def input_filename(self) -> str:
        return self._input_filename
    @input_filename.setter
    def input_filename(self, input_filename: str) -> None:
        self._input_filename = input_filename
    @property
    def cc(self) -> Optional[str]:
        return self._cc
    @cc.setter
    def cc(self, cc: Optional[str]) -> None:
        self._cc = cc
    @property
    def date(self) -> Optional[Union[str, datetime]]:
        return self._date
    @date.setter
    def date(self, date: Optional[Union[str, datetime]] = None) -> None:
        if isinstance(date, str):
            self._date = datetime.strptime(date, r'%d-%m-%Y')
        else:
            self._date = date
    def get_errors(self) -> dict:
        """
        Get all errors
        Returns:
            dict: Error intances grouped by entity acronym.
        """
        return self._errors
    def add_error(self, error: Error) -> None:
        """
        Add an error.
        Args:
            error (Error): Error instance.
        """
        if error.entity.acronym not in self._errors:
            self._errors[error.entity.acronym] = [error]
        else:
            self._errors[error.entity.acronym].append(error)
--- a/mirri/validation/error_logging/error_message.py
+++ b/mirri/validation/error_logging/error_message.py
@ -0,0 +1,408 @@
 from typing import Optional
 class ErrorMessage():
    """Error message
    Args:
        code (str): Error code.
        pk (str | optional): The instance's primary key that triggered the error. Defaults to None.
        value (str | optional): The instance's value that triggered the error. Defaults to None.
    """
    def __init__(self, code: str, pk: Optional[str] = None, value: Optional[str] = None):
        self.code = code.upper()
        self.pk = pk
        self.value = value
    @property
    def _codes(self) -> list:
        return [
            func
            for func in dir(self)
            if func.isupper() and
            callable(getattr(self, func)) and
            not func.startswith("__")
        ]
    @property
    def _messages(self) -> dict:
        return {code: getattr(self, code) for code in self._codes}
    @property
    def message(self) -> str:
        if not self._validate_code():
            raise ValueError(f"{self.code} not found")
        return self._messages[self.code]()
    @property
    def code(self) -> str:
        return self._code
    @code.setter
    def code(self, code: str) -> None:
        self._code = code.upper()
    def _validate_code(self) -> bool:
        return self.code in self._codes
    @property
    def pk(self) -> str:
        return self._pk
    @pk.setter
    def pk(self, pk: str) -> None:
        self._pk = pk
    @property
    def value(self) -> str:
        return self._value
    @value.setter
    def value(self, value: str) -> None:
        self._value = value
    """
        Excel File Structure Error Codes
    """
    def EXL00(self):
        return f"The provided file '{self.pk}' is not an excel(xlsx) file"
    def EFS01(self):
        return "The 'Growth media' sheet is missing. Please check the provided excel template."
    def EFS02(self):
        return "The 'Geographic origin' sheet is missing. Please check the provided excel template."
    def EFS03(self):
        return "The 'Literature' sheet is missing. Please check the provided excel template."
    def EFS04(self):
        return "The 'Sexual state' sheet is missing. Please check the provided excel template."
    def EFS05(self):
        return "The 'Strains' sheet is missing. Please check the provided excel template."
    def EFS06(self):
        return "The 'Ontobiotope' sheet is missing. Please check the provided excel template."
    def EFS07(self):
        return "The 'Markers' sheet is missing. Please check the provided excel template."
    def EFS08(self):
        return "The 'Genomic information' sheet is missing. Please check the provided excel template."
    """
        Growth Media Error Codes
    """
    def GMD01(self):
        return "The 'Acronym' column is a mandatory field in the Growth Media sheet."
    def GMD02(self):
        return "The 'Acronym' column is empty or has missing values."
    def GMD03(self):
        return "The 'Description' column is a mandatory field in the Growth Media sheet. The column can not be empty."
    def GMD04(self):
        return f"The 'Description' for growth media with Acronym {self.pk} is missing."
    """
        Geographic Origin Error Codes
    """
    def GOD01(self):
        return "The 'ID' column is a mandatory field in the Geographic Origin sheet."
    def GOD02(self):
        return "The 'ID' column is empty or has missing values."
    def GOD03(self):
        return "The 'Country' column is a mandatory field in the Geographic Origin sheet. The column can not be empty."
    def GOD04(self):
        return f"The 'Country' for geographic origin with ID {self.pk} is missing."
    def GOD05(self):
        return f"The 'Country' for geographic origin with ID {self.pk} is incorrect."
    def GOD06(self):
        return f"The 'Locality' column is a mandatory field in the Geographic Origin sheet. The column can not be empty."
    def GOD07(self):
        return f"The 'Locality' for geographic origin with ID {self.pk} is missing."
    """
        Literature Error Codes
    """
    def LID01(self):
        return "The 'ID' column is a mandatory field in the Literature sheet."
    def LID02(self):
        return "The 'ID' column empty or missing values."
    def LID03(self):
        return "The 'Full reference' column is a mandatory field in the Literature sheet. The column can not be empty."
    def LID04(self):
        return f"The 'Full reference' for literature with ID {self.pk} is missing."
    def LID05(self):
        return "The 'Authors' column is a mandatory field in the Literature sheet. The column can not be empty."
    def LID06(self):
        return f"The 'Authors' for literature with ID {self.pk} is missing."
    def LID07(self):
        return "The 'Title' column is a mandatory field in the Literature sheet. The column can not be empty."
    def LID08(self):
        return f"The 'Title' for literature with ID {self.pk} is missing."
    def LID09(self):
        return "The 'Journal' column is a mandatory field in the Literature sheet. The column can not be empty."
    def LID10(self):
        return f"The 'Journal' for literature with ID {self.pk} is missing."
    def LID11(self):
        return "The 'Year' column is a mandatory field in the Literature sheet. The column can not be empty."
    def LID12(self,):
        return f"The 'Year' for literature with ID {self.pk} is missing."
    def LID13(self):
        return "The 'Volume' column is a mandatory field in the Literature sheet. The column can not be empty."
    def LID14(self):
        return f"The 'Volume' for literature with ID {self.pk} is missing."
    def LID15(self):
        return "The 'First page' column is a mandatory field. The column can not be empty."
    def LID16(self):
        return f"The 'First page' for literature with ID {self.pk} is missing."
    def LID17(self):
        msg = 'If journal; Title, Authors, journal, year and first page are required'
        msg += 'If Book; Book Title, Authors, Year, Editors, Publishers'
        return msg
    """
        Strains Error Codes
    """
    def STD01(self):
        return "The 'Accession number' column is a mandatory field in the Strains sheet."
    def STD02(self):
        return "The 'Accession number' column is empty or has missing values."
    def STD03(self):
        return f"The 'Accesion number' must be unique. The '{self.value}' is repeated."
    def STD04(self):
        return (f"The 'Accession number' {self.pk} is not according to the specification."
                " The value must be of the format '<Sequence of characters> <sequence of characters>'.")
    def STD05(self):
        return f"The 'Restriction on use' column is a mandatory field in the Strains Sheet. The column can not be empty."
    def STD06(self):
        return f"The 'Restriction on use' for strain with Accession Number {self.pk} is missing."
    def STD07(self):
        return (f"The 'Restriction on use' for strain with Accession Number {self.pk} is not according to the specification."
                f" Your value is {self.value} and the accepted values are 1, 2, 3.")
    def STD08(self):
        return f"The 'Nagoya protocol restrictions and compliance conditions' column is a mandatory field in the Strains Sheet. The column can not be empty."
    def STD09(self):
        return f"The 'Nagoya protocol restrictions and compliance conditions' for strain with Accession Number {self.pk} is missing."
    def STD10(self):
        return (f"The 'Nagoya protocol restrictions and compliance conditions' for strain with Accession Number {self.pk} is not according to the specification."
                f" Your value is {self.value} and the accepted values are 1, 2, 3.")
    def STD11(self):
        return (f"The 'Strain from a registered collection' for strain with Accession Number {self.pk} is not according to specification."
                f" Your value is {self.value} and the accepted values are 1, 2, 3.")
    def STD12(self):
        return "The 'Risk group' column is a mandatory field in the Strains Sheet. The column can not be empty."
    def STD13(self):
        return f"The 'Risk group' for strain with Accession Number {self.pk} is missing."
    def STD14(self):
        return (f"The 'Risk group' for strain with Accession Number {self.pk} is not according to specification."
                f" Your value is {self.value} and the accepted values are 1, 2, 3, 4.")
    def STD15(self):
        return (f"The 'Dual use' for strain with Accession Number {self.pk} is not according to specification."
                f" Your value is {self.value} and the accepted values are 1, 2.")
    def STD16(self):
        return (f"The “Quarantine in europe” for strain with Accession Number {self.pk} is not according to specification."
                f" Your value is {self.value} and the accepted values are 1, 2.")
    def STD17(self):
        return f"The 'Organism type' column is a mandatory field in the Strains Sheet. The column can not be empty."
    def STD18(self):
        return f"The 'Organism type' for strain with Accession Number {self.pk} is missing."
    def STD19(self):
        return (f"The 'Organism type' for strain with Accession Number {self.pk} is not according to specification."
                f" Your value is {self.value} and the accepted values are 'Algae', 'Archaea', 'Bacteria', 'Cyanobacteria', "
                "'Filamentous Fungi',  'Phage', 'Plasmid', 'Virus', 'Yeast', 1, 2, 3, 4, 5, 6, 7, 8, 9.")
    def STD20(self):
        return f"The 'Taxon name' column is a mandatory field in the Strains Sheet. The column can not be empty."
    def STD21(self):
        return f"The 'Taxon name' for strain with Accession Number {self.pk} is missing."
    def STD22(self):
        return f"The 'Taxon name' for strain with Accession Number {self.pk} is incorrect."
    def STD23(self):
        return (f"The 'Interspecific hybrid' for strain with Accession Number {self.pk} is not according to specification."
                f" Your value is {self.value} and the accepted values are 1, 2.")
    def STD24(self):
        return f"The 'History of deposit' for strain with Accession Number {self.pk} is incorrect."
    def STD25(self):
        return (f"The 'Date of deposit' for strain with Accession Number {self.pk} is incorrect."
                " The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
    def STD26(self):
        return (f"The 'Date of inclusion in the catalogue' for strain with Accession Number {self.pk} is incorrect."
                " The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
    def STD27(self):
        return (f"The 'Date of collection' for strain with Accession Number {self.pk} is incorrect."
                " The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
    def STD28(self):
        return (f"The 'Date of isolation' for strain with Accession Number {self.pk} is incorrect."
                " The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
    def STD29(self):
        return (f"The 'Tested temperature growth range' for strain with Accession Number {self.pk} is incorrect."
                " It must have two decimal numbers separated by ','")
    def STD30(self):
        return f"The 'Recommended growth temperature' column is a mandatory field in the Strains Sheet. The column can not be empty."
    def STD31(self):
        return f"The 'Recommended growth temperature' for strain with Accession Number {self.pk} is missing."
    def STD32(self):
        return (f"The 'Recommended growth temperature' for strain with Accession Number {self.pk} is incorrect."
                " It must have two decimal numbers separated by ','.")
    def STD33(self):
        return f"The 'Recommended medium for growth' column is a mandatory field in the Strains Sheet. The column can not be empty."
    def STD34(self):
        return f"The 'Recommended medium for growth' for strain with Accession Number {self.pk} is missing."
    def STD35(self):
        return f"The value of 'Recommended medium for growth' for strain with Accession Number {self.pk} is not in the Growth Media Sheet."
    def STD36(self):
        return f"The 'Forms of supply' column is a mandatory field in the Strains Sheet. The column can not be empty."
    def STD37(self):
        return f"The 'Forms of supply' for strain with Accession Number {self.pk} is missing."
    def STD38(self):
        return f"The value of 'Forms of supply' for strain with Accession Number {self.pk} is not in the Forms of Supply Sheet."
    def STD39(self):
        return (f"The 'Coordinates of geographic origin' column for strain with Accession Number {self.pk} is incorrect."
                "The allowed formats are two or three decimal numbers separated by ','. Moreover, the first number must be"
                "between [-90, 90], the second between [-180, 180], and the third, if provided, can assume any value.")
    def STD40(self):
        return (f"The 'Altitude of geographic origin' column for strain with Accession Number {self.pk} is incorrect."
                "The allowed formats are one decimal number between [-200, 8000].")
    def STD41(self):
        return f"The value of 'Ontobiotope term for the isolation habitat' for strain with Accession Number {self.pk} is not in the Ontobiotope Sheet."
    def STD42(self):
        return (f"The 'GMO' for strain with Accession Number {self.pk} is not according to specification."
                f" Your value is {self.value} and the accepted values are 1, 2")
    def STD43(self):
        return (f"The 'Sexual State' for strain with Accession Number {self.pk} is not according to specification."
                f" Your value is {self.value} and the accepted values are 'Mata', 'Matalpha', 'Mata/Matalpha', "
                "'Matb', 'Mata/Matb', 'MTLa', 'MTLalpha', 'MTLa/MTLalpha', 'MAT1-1', 'MAT1-2', 'MAT1', 'MAT2', 'MT+', 'MT-'")
    def STD44(self):
        return (f"The 'Ploidy' for strain with Accession Number {self.pk} is not according to specification."
                f" Your value is {self.value} and the accepted values are 0, 1, 2, 3, 4, 9")
    def STD45(self):
        msg = f"At least one of the values '{self.value}' of the literature field for strain {self.pk} are not in the literature sheet. "
        msg += "If the those values are Pubmed ids or DOIs, please ignore this messsage"
        return msg
    """
        Genomic Information Error Codes
    """
    def GID01(self):
        return f"The 'Strain Acession Number' (Strain AN) column is a mandatory field in the Genomic Information Sheet."
    def GID02(self):
        return f"The 'Strain Acession Number' (Strain AN) column is empty or has missing values."
    def GID03(self):
        return f"The value of 'Strain Acession Number' (Strain AN) {self.value} is not in the Strains sheet."
    def GID04(self):
        return f"The 'Marker' column is a mandatory field in the Genomic Information Sheet. The column can not be empty."
    def GID05(self):
        return f"The 'Marker' for genomic information with Strain AN {self.pk} is missing."
    def GID06(self):
        return f"The 'Marker' for genomic information with Strain AN {self.pk} is incorrect."
    def GID07(self):
        return f"The 'INSDC AN' column is a mandatory field in the Genomic Information Sheet. The column can not be empty."
    def GID08(self):
        return f"The 'INSDC AN' for genomic information with Strain AN {self.pk} is missing."
    def GID09(self):
        return f"The 'INSDC AN' for genomic information with Strain AN {self.pk} is incorrect."
    def GID10(self):
        return (f"The 'Sequence' for genomic information with Strain AN {self.pk} is incorrect."
                " It must be a sequence of 'G', 'T', 'A', 'C' characteres of any length and without white spaces.")
    """
        Ontobiotope Error Codes
    """
    def OTD01(self):
        return "The 'ID' columns is a mandatory field in the Ontobiotope Sheet."
    def OTD02(self):
        return "The 'ID' columns is empty or has missing values."
    def OTD03(self):
        return "The 'Name' columns is a mandatory field in the Ontobiotope Sheet. The column can not be empty."
    def OTD04(self):
        return f"The 'Name' for ontobiotope with ID {self.pk} is missing."
--- a/mirri/validation/excel_validator.py
+++ b/mirri/validation/excel_validator.py
@ -0,0 +1,483 @@
 import re
 from pathlib import Path
 from io import BytesIO
 from zipfile import BadZipfile
 from datetime import datetime
 from calendar import monthrange
 from openpyxl import load_workbook
 from mirri.io.parsers.excel import workbook_sheet_reader, get_all_cell_data_from_sheet
 from mirri.validation.error_logging import ErrorLog, Error
 from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
                                   ERROR_CODE, FIELD, MANDATORY, MATCH,
                                   MISSING, MULTIPLE, NAGOYA, NUMBER, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON,
                                   TYPE, UNIQUE, VALIDATION, VALUES, BIBLIO)
 from mirri.settings import LOCATIONS, SUBTAXAS
 from mirri.validation.validation_conf_20200601 import MIRRI_20200601_VALLIDATION_CONF
 def validate_mirri_excel(fhand, version="20200601"):
    if version == "20200601":
        configuration = MIRRI_20200601_VALLIDATION_CONF
    else:
        raise NotImplementedError("Only version20200601 is implemented")
    return validate_excel(fhand, configuration)
 def validate_excel(fhand, configuration):
    validation_conf = configuration['sheet_schema']
    cross_ref_conf = configuration['cross_ref_conf']
    in_memory_sheet_conf = configuration['keep_sheets_in_memory']
    excel_name = Path(fhand.name).stem
    error_log = ErrorLog(excel_name)
    try:
        workbook = load_workbook(filename=BytesIO(
            fhand.read()), read_only=True, data_only=True)
    except (BadZipfile, IOError):
        error = Error('EXL00', fhand.name, fhand.name)
        error_log.add_error(error)
        return error_log
    # excel structure errors
    structure_errors = list(validate_excel_structure(workbook, validation_conf))
    if structure_errors:
        for error in structure_errors:
            error = Error(error[ERROR_CODE], pk=error['id'],
                          data=error['value'])
            error_log.add_error(error)
        return error_log
    crossrefs = get_all_crossrefs(workbook, cross_ref_conf)
    in_memory_sheets = get_all_in_memory_sheet(workbook, in_memory_sheet_conf)
    content_errors = validate_content(workbook, validation_conf,
                                      crossrefs, in_memory_sheets)
    for error in content_errors:
        # if error[ERROR_CODE] == 'STD43':
        #     continue
        error = Error(error[ERROR_CODE], pk=error['id'], data=error['value'])
        error_log.add_error(error)
    return error_log
 def validate_excel_structure(workbook, validation_conf):
    for sheet_name, sheet_conf in validation_conf.items():
        mandatory = sheet_conf.get(VALIDATION, {}).get(TYPE, None)
        mandatory = mandatory == MANDATORY
        error_code = sheet_conf.get(VALIDATION, {}).get(ERROR_CODE, False)
        try:
            sheet = workbook[sheet_name]
        except KeyError:
            sheet = None
        if sheet is None:
            if mandatory:
                yield {'id': None, 'sheet': sheet_name, 'field': None,
                       'error_code': error_code, 'value': None}
            continue
        headers = _get_sheet_headers(sheet)
        for column in sheet_conf.get(COLUMNS):
            field = column[FIELD]
            for step in column.get(VALIDATION, []):
                if step[TYPE] == MANDATORY and field not in headers:
                    yield {'id': None, 'sheet': sheet_name, 'field': field,
                           'error_code': step[ERROR_CODE], 'value': None}
 def _get_sheet_headers(sheet):
    first_row = next(sheet.iter_rows(min_row=1, max_row=1))
    return [c.value for c in first_row]
 def _get_values_from_columns(workbook, sheet_name, columns):
    indexed_values = {}
    for row in workbook_sheet_reader(workbook, sheet_name):
        for col in columns:
            indexed_values[str(row.get(col))] = ""
    return indexed_values
 def get_all_crossrefs(workbook, cross_refs_names):
    crossrefs = {}
    for ref_name, columns in cross_refs_names.items():
        if columns:
            crossrefs[ref_name] = _get_values_from_columns(workbook, ref_name,
                                                               columns)
        else:
            try:
                crossrefs[ref_name] = get_all_cell_data_from_sheet(workbook, ref_name)
            except ValueError as error:
                if 'sheet is missing' in str(error):
                    crossrefs[ref_name] = []
                else:
                    raise
    return crossrefs
 def get_all_in_memory_sheet(workbook, in_memory_sheet_conf):
    in_memory_sheets = {}
    for sheet_conf in in_memory_sheet_conf:
        sheet_name = sheet_conf['sheet_name']
        indexed_by = sheet_conf['indexed_by']
        rows = workbook_sheet_reader(workbook, sheet_name)
        indexed_rows = {row[indexed_by]: row for row in rows}
        in_memory_sheets[sheet_name] = indexed_rows
    return in_memory_sheets
 def validate_content(workbook, validation_conf, crossrefs, in_memory_sheets):
    for sheet_name in validation_conf.keys():
        sheet_conf = validation_conf[sheet_name]
        sheet_id_column = sheet_conf['id_field']
        shown_values = {}
        row_validation_steps = sheet_conf.get(ROW_VALIDATION, None)
        for row in workbook_sheet_reader(workbook, sheet_name):
            id_ = row.get(sheet_id_column, None)
            if id_ is None:
                error_code = _get_missing_row_id_error(sheet_id_column,
                                                       sheet_conf)
                yield {'id': id_, 'sheet': sheet_name,
                       'field': sheet_id_column,
                       'error_code': error_code, 'value': None}
                continue
            do_have_cell_error = False
            for column in sheet_conf[COLUMNS]:
                label = column[FIELD]
                validation_steps = column.get(VALIDATION, None)
                value = row.get(label, None)
                if validation_steps:
                    error_code = validate_cell(value, validation_steps,
                                               crossrefs, shown_values, label)
                    if error_code is not None:
                        do_have_cell_error = True
                        yield {'id': id_, 'sheet': sheet_name, 'field': label,
                               'error_code': error_code, 'value': value}
            if not do_have_cell_error and row_validation_steps:
                error_code = validate_row(
                    row, row_validation_steps, in_memory_sheets)
                if error_code is not None:
                    yield {'id': id_, 'sheet': sheet_name, 'field': 'row',
                           'error_code': error_code, 'value': 'row'}
 def _get_missing_row_id_error(sheet_id_column, sheet_conf):
    error_code = None
    for column in sheet_conf[COLUMNS]:
        if column[FIELD] == sheet_id_column:
            error_code = [step[ERROR_CODE]
                          for step in column[VALIDATION] if step[TYPE] == MISSING][0]
    return error_code
 def validate_row(row, validation_steps, in_memory_sheets):
    for validation_step in validation_steps:
        kind = validation_step[TYPE]
        error_code = validation_step[ERROR_CODE]
        if kind == NAGOYA:
            if not is_valid_nagoya(row, in_memory_sheets):
                return error_code
        elif kind == BIBLIO:
            if not is_valid_pub(row):
                return error_code
        else:
            msg = f'{kind} is not a recognized row validation type method'
            raise NotImplementedError(msg)
 def validate_cell(value, validation_steps, crossrefs, shown_values, label):
    for step_conf in validation_steps:
        if step_conf[TYPE] == MANDATORY:
            continue
        step_conf['crossrefs_pointer'] = crossrefs
        step_conf['shown_values'] = shown_values
        step_conf['label'] = label
        error_code = validate_value(value, step_conf)
        if error_code is not None:
            return error_code
 def is_valid_pub(row):
    title = row.get('Title', None)
    full_reference = row.get('Full reference', None)
    authors = row.get('Authors', None)
    journal = row.get('Journal', None)
    year = row.get('Year', None)
    volumen = row.get('Volumen', None)
    first_page = row.get('First page', None)
    book_title = row.get('Book title', None)
    editors = row.get('Editors', None)
    publishers = row.get('Publishers', None)
    if full_reference:
        return True
    is_journal = bool(title)
    if (is_journal and (not authors  or not journal or not not year or
                        not volumen or not first_page)):
        return False
    if (not is_journal and (not authors or not year or
                            not editors or not publishers or not book_title)):
        return False
    return True
 def is_valid_nagoya(row, in_memory_sheets):  # sourcery skip: return-identity
    location_index = row.get('Geographic origin', None)
    if location_index is None:
        country = None
    else:
        geo_origin = in_memory_sheets[LOCATIONS].get(location_index, {})
        country = geo_origin.get('Country', None)
    _date = row.get("Date of collection", None)
    if _date is None:
        _date = row.get("Date of isolation", None)
    if _date is None:
        _date = row.get("Date of deposit", None)
    if _date is None:
        _date = row.get("Date of inclusion in the catalogue", None)
    if _date is not None:
        year = _date.year if isinstance(_date, datetime) else int(str(_date)[:4])
    else:
        year = None
    if year is not None and year >= 2014 and country is None:
        return False
    return True
 def is_valid_regex(value, validation_conf):
    if value is None:
        return True
    value = str(value)
    regexp = validation_conf[MATCH]
    multiple = validation_conf.get(MULTIPLE, False)
    separator = validation_conf.get(SEPARATOR, None)
    values = [v.strip() for v in value.split(
        separator)] if multiple else [value]
    for value in values:
        matches_regexp = re.fullmatch(regexp, value)
        if not matches_regexp:
            return False
    return True
 def is_valid_crossrefs(value, validation_conf):
    crossref_name = validation_conf[CROSSREF_NAME]
    crossrefs = validation_conf['crossrefs_pointer']
    choices = crossrefs[crossref_name]
    if value is None or not choices:
        return True
    value = str(value)
    multiple = validation_conf.get(MULTIPLE, False)
    separator = validation_conf.get(SEPARATOR, None)
    if value is None:
        return True
    if multiple:
        values = [v.strip() for v in value.split(separator)]
    else:
        values = [value.strip()]
    return all(value in choices for value in values)
 def is_valid_choices(value, validation_conf):
    if value is None:
        return True
    choices = validation_conf[VALUES]
    multiple = validation_conf.get(MULTIPLE, False)
    separator = validation_conf.get(SEPARATOR, None)
    if multiple:
        values = [v.strip() for v in str(value).split(separator)]
    else:
        values = [str(value).strip()]
    return all(value in choices for value in values)
 def is_valid_date(value, validation_conf):
    if value is None:
        return True
    if isinstance(value, datetime):
        year = value.year
        month = value.month
        day = value.day
    elif isinstance(value, int):
        year = value
        month = None
        day = None
    elif isinstance(value, str):
        value = value.replace('-', '')
        value = value.replace('/', '')
        month = None
        day = None
        try:
            year = int(value[: 4])
            if len(value) >= 6:
                month = int(value[4: 6])
                if len(value) >= 8:
                    day = int(value[6: 8])
        except (IndexError, TypeError, ValueError):
            return False
    else:
        return False
    if year < 1700 or year > datetime.now().year:
        return False
    if month is not None:
        if month < 1 or month > 13:
            return False
        if day is not None and (day < 1 or day > monthrange(year, month)[1]):
            return False
    return True
 def is_valid_coords(value, validation_conf=None):
    # sourcery skip: return-identity
    if value is None:
        return True
    try:
        items = [i.strip() for i in value.split(";")]
        latitude = float(items[0])
        longitude = float(items[1])
        if len(items) > 2:
            precision = float(items[2])
        if latitude < -90 or latitude > 90:
            return False
        if longitude < -180 or longitude > 180:
            return False
        return True
    except:
        return False
 def is_valid_missing(value, validation_conf=None):
    return value is not None
 def is_valid_number(value, validation_conf):
    if value is None:
        return True
    try:
        value = float(value)
    except TypeError:
        return False
    except ValueError:
        return False
    _max = validation_conf.get('max', None)
    _min = validation_conf.get('min', None)
    if (_max is not None and value > _max) or (_min is not None and value < _min):
        return False
    return True
 def is_valid_taxon(value, validation_conf=None):
    multiple = validation_conf.get(MULTIPLE, False)
    separator = validation_conf.get(SEPARATOR, ';')
    value = value.split(separator) if multiple else [value]
    for taxon in value:
        taxon = taxon.strip()
        if not _is_valid_taxon(taxon):
            return False
    return True
 def _is_valid_taxon(value):
    value = value.strip()
    if not value:
        return True
    items = re.split(r" +", value)
    genus = items[0]
    if len(items) > 1:
        species = items[1]
        if species in ("sp", "spp", ".sp", "sp."):
            return False
        if len(items) > 2:
            for index in range(0, len(items[2:]), 2):
                rank = SUBTAXAS.get(items[index + 2], None)
                if rank is None:
                    print(value)
                    return False
    return True
 def is_valid_unique(value, validation_conf):
    label = validation_conf['label']
    shown_values = validation_conf['shown_values']
    if label not in shown_values:
        shown_values[label] = {}
    already_in_file = shown_values[label]
    if value in already_in_file:
        return False
    # NOTE: what's the use of this?
    # What is the expected format for value and shown_values?
    shown_values[label][value] = None
    return True
 def is_valid_file(path):
    try:
        with path.open("rb") as fhand:
            error_log = validate_mirri_excel(fhand)
            if "EXL" in error_log.get_errors():
                return False
    except:
        return False
    return True
 VALIDATION_FUNCTIONS = {
    MISSING: is_valid_missing,
    REGEXP: is_valid_regex,
    CHOICES: is_valid_choices,
    CROSSREF: is_valid_crossrefs,
    DATE: is_valid_date,
    COORDINATES: is_valid_coords,
    NUMBER: is_valid_number,
    TAXON: is_valid_taxon,
    UNIQUE: is_valid_unique}
 def validate_value(value, step_conf):
    kind = step_conf[TYPE]
    try:
        is_value_valid = VALIDATION_FUNCTIONS[kind]
    except KeyError:
        msg = f'This validation type {kind} is not implemented'
        raise NotImplementedError(msg)
    error_code = step_conf[ERROR_CODE]
    if not is_value_valid(value, step_conf):
        return error_code
--- a/mirri/validation/tags.py
+++ b/mirri/validation/tags.py
@ -0,0 +1,24 @@
 MANDATORY = "mandatory"
 REGEXP = "regexp"
 CHOICES = "choices"
 CROSSREF = 'crossref'
 CROSSREF_NAME = 'crossref_name'
 MISSING = "missing"
 VALIDATION = 'validation'
 ERROR_CODE = 'error_code'
 FIELD = 'field'
 MULTIPLE = 'multiple'
 TYPE = 'type'
 COLUMNS = 'columns'
 SOURCE = "sources"
 SEPARATOR = "separator"
 MATCH = 'match'
 VALUES = 'values'
 DATE = 'date'
 COORDINATES = 'coord'
 NUMBER = 'number'
 TAXON = 'taxon'
 UNIQUE = 'unique'
 ROW_VALIDATION = 'row_validation'
 NAGOYA = 'nagoya'
 BIBLIO = 'bibliography'
--- a/mirri/validation/validation_conf_20200601.py
+++ b/mirri/validation/validation_conf_20200601.py
@ -0,0 +1,548 @@
 from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
                                   ERROR_CODE, FIELD, MANDATORY, MATCH,
                                   MISSING, MULTIPLE, NAGOYA, NUMBER, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON, TYPE,
                                   UNIQUE,
                                   VALIDATION, VALUES, BIBLIO)
 from mirri.settings import (GEOGRAPHIC_ORIGIN, ONTOBIOTOPE, LOCATIONS, GROWTH_MEDIA, GENOMIC_INFO,
                            STRAINS, LITERATURE_SHEET, SEXUAL_STATE_SHEET)
 # MARKERS,
 # SEXUAL_STATE_SHEET,
 # RESOURCE_TYPES_VALUES,
 # FORM_OF_SUPPLY_SHEET,
 # PLOIDY_SHEET)
 STRAIN_FIELDS = [
    {
        FIELD: "Accession number",
        VALIDATION: [
            {TYPE: MANDATORY, ERROR_CODE: 'STD01'},
            {TYPE: UNIQUE, ERROR_CODE: 'STD03'},
            {TYPE: MISSING, ERROR_CODE: "STD02"},
            {TYPE: REGEXP, MATCH: "[^ ]* [^ ]*", ERROR_CODE: "STD04"}
        ]
    },
    {
        FIELD: "Restrictions on use",
        VALIDATION: [
            {TYPE: MANDATORY, ERROR_CODE: "STD05"},
            {TYPE: MISSING, ERROR_CODE: "STD06"},
            {TYPE: CHOICES, VALUES: ["1", "2", "3"],
             MULTIPLE: False, ERROR_CODE: "STD07"}
        ]
    },
    {
        FIELD: "Nagoya protocol restrictions and compliance conditions",
        VALIDATION: [
            {TYPE: MANDATORY, ERROR_CODE: "STD08"},
            {TYPE: MISSING, ERROR_CODE: "STD09"},
            {TYPE: CHOICES, VALUES: ["1", "2", "3"],
             MULTIPLE: False, ERROR_CODE: "STD10"}
        ]
    },
    {
        FIELD: "ABS related files",
        VALIDATION: [],
    },
    {
        FIELD: "MTA file",
        VALIDATION: [],
    },
    {
        FIELD: "Other culture collection numbers",
        # VALIDATION: [
        #     {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD07",
        #      MULTIPLE: True, SEPARATOR: ";"}
        # ]
    },
    {
        FIELD: "Strain from a registered collection",
        VALIDATION: [
            {TYPE: CHOICES, VALUES: ["1", "2"],
             ERROR_CODE: "STD11"}
        ]
    },
    {
        FIELD: "Risk Group",
        VALIDATION: [
            {TYPE: MANDATORY, ERROR_CODE: "STD12"},
            {TYPE: MISSING, ERROR_CODE: "STD13"},
            {TYPE: CHOICES, VALUES: ["1", "2", "3", "4"],
             MULTIPLE: False, ERROR_CODE: "STD14"}
        ]
    },
    {
        FIELD: "Dual use",
        VALIDATION: [
            {TYPE: CHOICES, VALUES: ["1", "2"],
             ERROR_CODE: "STD15"}
        ]
    },
    {
        FIELD: "Quarantine in Europe",
        VALIDATION: [
            {TYPE: CHOICES, VALUES: ["1", "2"],
             ERROR_CODE: "STD16"}
        ]
    },
    {
        FIELD: "Organism type",
        VALIDATION: [
            {TYPE: MANDATORY, ERROR_CODE: "STD17"},
            {TYPE: MISSING, ERROR_CODE: "STD18"},
            {TYPE: CHOICES, VALUES: ["Algae", "Archaea", "Bacteria",
                                     "Cyanobacteria", "Filamentous Fungi",
                                     "Phage", "Plasmid", "Virus", "Yeast",
                                     "1", "2", "3", "4", "5", "6", "7", "8", "9"],
             MULTIPLE: True, SEPARATOR: ";",  ERROR_CODE: "STD19"}
        ]
    },
    {
        FIELD: "Taxon name",
        VALIDATION: [
            {TYPE: MANDATORY, ERROR_CODE: "STD20"},
            {TYPE: MISSING, ERROR_CODE: "STD21"},
            {TYPE: TAXON, ERROR_CODE: "STD22", MULTIPLE: True,
             SEPARATOR: ';'}
        ]
    },
    {
        FIELD: "Infrasubspecific names",
    },
    {
        FIELD: "Comment on taxonomy",
    },
    {
        FIELD: "Interspecific hybrid",
        VALIDATION: [
            {TYPE: CHOICES, VALUES: ["1", "2"],
             ERROR_CODE: "STD23"}
        ]
    },
    {
        FIELD: "Status",
    },
    {
        FIELD: "History of deposit",
        VALIDATION: [
            # {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD24",  # modify the regex
            #  MULTIPLE: True, SEPARATOR: ";"}
        ]
    },
    {
        FIELD: "Depositor"
    },
    {
        FIELD: "Date of deposit",
        VALIDATION: [
            {TYPE: DATE, ERROR_CODE: "STD25"},
        ]
    },
    {
        FIELD: "Date of inclusion in the catalogue",
        VALIDATION: [
            {TYPE: DATE, ERROR_CODE: "STD26"},
        ]
    },
    {
        FIELD: "Collected by",
    },
    {
        FIELD: "Date of collection",
        VALIDATION: [
            {TYPE: DATE, ERROR_CODE: "STD27"},
        ]
    },
    {
        FIELD: "Isolated by",
    },
    {
        FIELD: "Date of isolation",
        VALIDATION: [
            {TYPE: DATE, ERROR_CODE: "STD28"},
        ]
    },
    {
        FIELD: "Substrate/host of isolation",
    },
    {
        FIELD: "Tested temperature growth range",
        VALIDATION: [
            {TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?',
             ERROR_CODE: "STD29", MULTIPLE: True, SEPARATOR: ";"}
        ]
    },
    {
        FIELD: "Recommended growth temperature",
        VALIDATION: [
            {TYPE: MANDATORY, ERROR_CODE: "STD30"},
            {TYPE: MISSING, ERROR_CODE: "STD31"},
            {TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?',
             ERROR_CODE: "STD32",
             MULTIPLE: True, SEPARATOR: ";"}
        ]
    },
    {
        FIELD: "Recommended medium for growth",
        VALIDATION: [
            {TYPE: MANDATORY, ERROR_CODE: "STD33"},
            {TYPE: MISSING, ERROR_CODE: "STD34"},
            {TYPE: CROSSREF, CROSSREF_NAME: "Growth media",
             MULTIPLE: True, SEPARATOR: "/", ERROR_CODE: "STD35"}
        ]
    },
    {
        FIELD: "Form of supply",
        VALIDATION: [
            {TYPE: MANDATORY, ERROR_CODE: "STD36"},
            {TYPE: MISSING, ERROR_CODE: "STD37"},
            {TYPE: CHOICES, VALUES: ['Agar', 'Cryo', 'Dry Ice', 'Liquid Culture Medium',
                                     'Lyo', 'Oil', 'Water'],
             MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD38"}
        ]
    },
    {
        FIELD: "Other denomination",
    },
    {
        FIELD: "Coordinates of geographic origin",
        VALIDATION: [
            {TYPE: COORDINATES, ERROR_CODE: "STD39"},
        ]
    },
    {
        FIELD: "Altitude of geographic origin",
        VALIDATION: [
            {TYPE: NUMBER, 'max': 8000, 'min': -200, ERROR_CODE: "STD40"},
        ]
    },
    {
        # value can be in the cell or in another sheet. Don't configure this
        FIELD: "Geographic origin",
    },
    {
        FIELD: "Isolation habitat",
    },
    {
        FIELD: "Ontobiotope term for the isolation habitat",
        VALIDATION: [
            {TYPE: CROSSREF, CROSSREF_NAME: "Ontobiotope",
             MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD41"}
        ]
    },
    {
        FIELD: "GMO",
        VALIDATION: [
            {TYPE: CHOICES, VALUES: ["1", "2"],
             ERROR_CODE: "STD42"}
        ]
    },
    {
        FIELD: "GMO construction information",
    },
    {
        FIELD: "Mutant information",
    },
    {
        FIELD: "Genotype",
    },
    {
        FIELD: "Sexual state",
        VALIDATION: [
            {TYPE: CROSSREF, CROSSREF_NAME: SEXUAL_STATE_SHEET,
             ERROR_CODE: "STD43"}
            # {TYPE: CHOICES, VALUES: ["Mata", "Matalpha", "Mata/Matalpha",
            #                          "Matb", "Mata/Matb", "MTLa", "MTLalpha", "MTLa/MTLalpha",
            #                          "MAT1-1", "MAT1-2", "MAT1", "MAT2", "MT+", "MT-"],
            #  ERROR_CODE: "STD43"}
        ]
    },
    {
        FIELD: "Ploidy",
        VALIDATION: [
            {TYPE: CHOICES, VALUES: ["0", "1", "2", "3", "4", "9"],
             ERROR_CODE: "STD44"}
        ]
    },
    {
        FIELD: "Plasmids",
    },
    {
        FIELD: "Plasmids collections fields",
    },
    {
        # value can be in the cell or in another sheet. Don't configure this
        FIELD: "Literature",
        VALIDATION: [
            {TYPE: CROSSREF, CROSSREF_NAME: LITERATURE_SHEET,
             MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD45"}
        ]
    },
    {
        FIELD: "Plant pathogenicity code",
    },
    {
        FIELD: "Pathogenicity",
    },
    {
        FIELD: "Enzyme production",
    },
    {
        FIELD: "Production of metabolites",
    },
    {
        FIELD: "Applications",
    },
    {
        FIELD: "Remarks"
    },
    {
        FIELD: "Literature linked to the sequence/genome",
    },
 ]
 SHEETS_SCHEMA = {
    LOCATIONS: {
        "acronym": "GOD",
        "id_field": "ID",
        VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS02"},
        COLUMNS: [
            {
                FIELD: "ID",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "GOD01"},
                    {TYPE: MISSING, ERROR_CODE: "GOD02"},
                ]
            },
            {
                FIELD: "Country",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "GOD03"},
                    {TYPE: MISSING, ERROR_CODE: "GOD04"}
                ]
            },
            {
                FIELD: "Region",
                VALIDATION: []
            },
            {
                FIELD: "City",
                VALIDATION: []
            },
            {
                FIELD: "Locality",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "GOD06"},
                    {TYPE: MISSING, ERROR_CODE: "GOD07"}
                ]
            }
        ],
    },
    GROWTH_MEDIA: {
        "acronym": "GMD",
        "id_field": "Acronym",
        VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS01"},
        COLUMNS: [
            {
                FIELD: "Acronym",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "GMD01"},
                    {TYPE: MISSING, ERROR_CODE: "GMD02"}
                ]
            },
            {
                FIELD: "Description",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "GMD03"},
                    {TYPE: MISSING, ERROR_CODE: "GMD04"}
                ]
            },
            {
                FIELD: "Full description",
                VALIDATION: []
            },
        ],
    },
    GENOMIC_INFO: {
        "acronym": "GID",
        "id_field": "Strain AN",
        VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS08"},
        COLUMNS: [
            {
                FIELD: "Strain AN",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "GID01"},
                    {TYPE: MISSING, ERROR_CODE: "GID02"},
                    {TYPE: CROSSREF, CROSSREF_NAME: "Strains",
                     ERROR_CODE: "GID03"},
                ]
            },
            {
                FIELD: "Marker",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "GID04"},
                    {TYPE: MISSING, ERROR_CODE: "GID05"},
                    {TYPE: CHOICES, ERROR_CODE: "GID06",
                     VALUES: ['16S rRNA', 'ACT', 'CaM', 'EF-1α', 'ITS',
                              'LSU', 'RPB1', 'RPB2', 'TUBB']}
                ]
            },
            {
                FIELD: "INSDC AN",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "GID07"},
                    {TYPE: MISSING, ERROR_CODE: "GID08"},
                ]
            },
            {
                FIELD: "Sequence",
                VALIDATION: []
            },
        ],
    },
    STRAINS: {
        "acronym": "STD",
        'id_field': 'Accession number',
        VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS05"},
        ROW_VALIDATION: [
            {TYPE: NAGOYA, ERROR_CODE: "STRXXX"},
        ],
        COLUMNS: STRAIN_FIELDS,
    },
    LITERATURE_SHEET: {
        "acronym": "LID",
        'id_field': 'ID',
        VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS03"},
        ROW_VALIDATION: [
            {TYPE: BIBLIO, ERROR_CODE: 'LID17'}
        ],
        COLUMNS: [
            {
                FIELD: "ID",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "LID01"},
                    {TYPE: MISSING, ERROR_CODE: "LID02"},
                ]
            },
            {
                FIELD: "Full reference",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "LID03"},
                ]
            },
            {
                FIELD: "Authors",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "LID05"},
                ]
            },
            {
                FIELD: "Title",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "LID07"},
                ]
            },
            {
                FIELD: "Journal",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "LID09"},
                ]
            },
            {
                FIELD: "Year",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "LID11"},
                ]
            },
            {
                FIELD: "Volume",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "LID13"},
                ]
            },
            {
                FIELD: "Issue",
                VALIDATION: []
            },
            {
                FIELD: "First page",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "LID15"},
                    {TYPE: MISSING, ERROR_CODE: "LID16"},
                ]
            },
            {
                FIELD: "Last page",
                VALIDATION: []
            },
            {
                FIELD: "Book title",
                VALIDATION: []
            },
            {
                FIELD: "Editors",
                VALIDATION: []
            },
            {
                FIELD: "Publisher",
                VALIDATION: []
            }
        ],
    },
    # SEXUAL_STATE_SHEET: {"acronym": "SSD", COLUMNS: []},
    # RESOURCE_TYPES_VALUES: {"acronym": "RTD", COLUMNS: []},
    # FORM_OF_SUPPLY_SHEET: {"acronym": "FSD", COLUMNS: []},
    # PLOIDY_SHEET: {"acronym": "PLD", COLUMNS: []},
    ONTOBIOTOPE: {
        "acronym": "OTD",
        "id_field": "ID",
        VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS06"},
        COLUMNS: [
            {
                FIELD: "ID",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "OTD01"},
                    {TYPE: MISSING, ERROR_CODE: "OTD02"},
                ]
            },
            {
                FIELD: "Name",
                VALIDATION: [
                    {TYPE: MANDATORY, ERROR_CODE: "OTD03"},
                    {TYPE: MISSING, ERROR_CODE: "OTD04"},
                ]
            },
        ]
    },
    # MARKERS: {
    #     "acronym": "MKD",
    #     "id_field": "",
    #     COLUMNS: [
    #         {
    #             FIELD: "Acronym",
    #             VALIDATION: []
    #         },
    #         {
    #             FIELD: "Marker",
    #             VALIDATION: []
    #         },
    #     ],
    # },
 }
 CROSS_REF_CONF = {
    ONTOBIOTOPE: ['ID', 'Name'],
    LITERATURE_SHEET: ['ID'],
    LOCATIONS: ['Locality'],
    GROWTH_MEDIA: ['Acronym'],
    STRAINS: ["Accession number"],
    SEXUAL_STATE_SHEET: []
 }
 MIRRI_20200601_VALLIDATION_CONF = {
    'sheet_schema': SHEETS_SCHEMA,
    'cross_ref_conf': CROSS_REF_CONF,
    'keep_sheets_in_memory': [
        {'sheet_name': LOCATIONS, 'indexed_by': 'Locality'}]
 }
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
 openpyxl
 requests
 requests_oauthlib
 pycountry
 deepdiff
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,35 @@
 import setuptools
 from pathlib import Path
 from setuptools import find_packages
 with open("README.md", "r") as fh:
    long_description = fh.read()
 requirements = [line.strip() for line in open('requirements.txt')]
 scripts = [str(f) for f in Path('./bin').glob('*.py')]
 setuptools.setup(
    name="Mirri utils",  # Replace with your own username
    version=0.1,
    author="P.Ziarsolo",
    author_email="pziarsolo@gmail.com",
    description="A small library to help dealing with MIRRI data",
    long_description=long_description,
    long_description_content_type="text/markdown",
    url="https://github.com/pziarsolo/mirri_utils",
    packages=find_packages(),
    package_data={"mirri": ['data/ontobiotopes.csv']},
    # package_dir={"mirri.entities": "mirri.entities"
    #              "mirri.io.parsers": "mirri.io.parsers",
    #              "mirri.io.writers": "mirri.io.writers",
    #              'mirri.validation': 'mirri.vallidation'},
    install_requires=requirements,
    scripts=scripts,
    license="GNU General Public License v3.0",
    classifiers=[
        "Programming Language :: Python :: 3",
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
    ],
    python_requires='>=3.6',
 )
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/biolomics/init.py
+++ b/tests/biolomics/init.py
--- a/tests/biolomics/test_auth_operations.py
+++ b/tests/biolomics/test_auth_operations.py
@ -0,0 +1,22 @@
 import unittest
 from mirri.biolomics.remote.rest_client import BiolomicsClient
 try:
    from mirri.biolomics.secrets import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
 except ImportError:
    raise ImportError(
        'You need a secrets.py in the project dir. with CLIENT_ID, SECRET_ID, USERNAME, PASSWORD')
 from .utils import VERSION, SERVER_URL
 class BiolomicsClientAuthTest(unittest.TestCase):
     def test_authentication(self):
        client = BiolomicsClient(SERVER_URL, VERSION, CLIENT_ID, SECRET_ID,
                                 USERNAME, PASSWORD)
        access1 = client.get_access_token()
        access2 = client.get_access_token()
        assert access1 is not None
        self.assertEqual(access1, access2)
--- a/tests/biolomics/test_growth_medium_operations.py
+++ b/tests/biolomics/test_growth_medium_operations.py
@ -0,0 +1,62 @@
 import unittest
 from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS
 from mirri.biolomics.serializers.growth_media import GrowthMedium
 from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
 from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
 from tests.biolomics.utils import SERVER_URL, VERSION
 class BiolomicsSequenceClientTest(unittest.TestCase):
    def setUp(self):
        self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
                                           SECRET_ID, USERNAME, PASSWORD)
    def test_retrieve_media_by_id(self):
        record_id = 101
        growth_medium = self.client.retrieve_by_id('growth_medium', record_id)
        self.assertEqual(growth_medium.record_id, record_id)
        self.assertEqual(growth_medium.record_name, 'MA2PH6')
    def test_retrieve_media_by_id(self):
        record_name = 'MA2PH6'
        record_id = 101
        growth_medium = self.client.retrieve_by_name('growth_medium', record_name)
        self.assertEqual(growth_medium.record_id, record_id)
        self.assertEqual(growth_medium.record_name, record_name)
    def test_create_growth_media(self):
        self.client.start_transaction()
        try:
            growth_medium = GrowthMedium()
            growth_medium.acronym = 'BBB'
            growth_medium.ingredients = 'alkhdflakhf'
            growth_medium.description = 'desc'
            new_growth_medium = self.client.create(GROWTH_MEDIUM_WS, growth_medium)
            print(new_growth_medium.dict())
        finally:
            self.client.rollback()
    def test_update_growth_media(self):
        self.client.start_transaction()
        try:
            growth_medium = GrowthMedium()
            growth_medium.acronym = 'BBB'
            growth_medium.ingredients = 'alkhdflakhf'
            growth_medium.description = 'desc'
            growth_medium.full_description = 'full'
            new_growth_medium = self.client.create(GROWTH_MEDIUM_WS, growth_medium)
            new_growth_medium.full_description = 'full2'
            updated_gm = new_growth_medium = self.client.update(GROWTH_MEDIUM_WS, new_growth_medium)
            self.assertEqual(updated_gm.full_description, new_growth_medium.full_description)
            retrieved = self.client.retrieve_by_id(GROWTH_MEDIUM_WS, new_growth_medium.record_id)
            self.assertEqual(retrieved.full_description, updated_gm.full_description)
        finally:
            self.client.rollback()
--- a/tests/biolomics/test_literature_operations.py
+++ b/tests/biolomics/test_literature_operations.py
@ -0,0 +1,46 @@
 import unittest
 from .utils import VERSION, SERVER_URL
 from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
 from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient, BIBLIOGRAPHY_WS
 from mirri.entities.publication import Publication
 class BiolomicsLiteratureClientTest(unittest.TestCase):
    def setUp(self):
        self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
                                           SECRET_ID, USERNAME, PASSWORD)
    def test_retrieve_biblio_by_id(self):
        record_id = 100
        record_name = "Miscellaneous notes on Mucoraceae"
        biblio = self.client.retrieve_by_id(BIBLIOGRAPHY_WS, record_id)
        self.assertEqual(biblio.record_id, record_id)
        self.assertEqual(biblio.record_name, record_name)
    def test_retrieve_media_by_id(self):
        record_id = 100
        record_name = "Miscellaneous notes on Mucoraceae"
        biblio = self.client.retrieve_by_name(BIBLIOGRAPHY_WS, record_name)
        self.assertEqual(biblio.record_id, record_id)
        self.assertEqual(biblio.record_name, record_name)
        self.assertEqual(biblio.year, 1994)
        self.assertEqual(biblio.volume, '50')
    def test_create_biblio(self):
        pub = Publication()
        pub.pubmed_id = 'PM18192'
        pub.journal = 'my_journal'
        pub.title = 'awesome title'
        pub.authors = 'pasdas, aposjdasd, alsalsfda'
        pub.volume = 'volume 0'
        record_id = None
        try:
            new_pub = self.client.create(BIBLIOGRAPHY_WS, pub)
            record_id = new_pub.record_id
            self.assertEqual(new_pub.title, pub.title)
            self.assertEqual(new_pub.volume, pub.volume)
        finally:
            if record_id is not None:
                self.client.delete_by_id(BIBLIOGRAPHY_WS, record_id)
--- a/tests/biolomics/test_sequence_operations.py
+++ b/tests/biolomics/test_sequence_operations.py
@ -0,0 +1,49 @@
 import unittest
 from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
 from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
 from mirri.biolomics.serializers.sequence import GenomicSequenceBiolomics
 from .utils import VERSION, SERVER_URL
 class BiolomicsSequenceClientTest(unittest.TestCase):
    def setUp(self) -> None:
        self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
                                           SECRET_ID, USERNAME, PASSWORD)
    def test_retrieve_seq_by_id(self):
        record_id = 101
        sequence = self.client.retrieve_by_id('sequence', record_id)
        self.assertEqual(sequence.record_id, record_id)
        self.assertEqual(sequence.record_name, 'MUM 02.54 - CaM')
        self.assertEqual(sequence.marker_type, 'CaM')
    def test_retrieve_seq_by_name(self):
        record_name = 'MUM 02.54 - CaM'
        sequence = self.client.retrieve_by_name('sequence', record_name)
        self.assertEqual(sequence.record_id, 101)
        self.assertEqual(sequence.record_name, record_name)
        self.assertEqual(sequence.marker_type, 'CaM')
    def test_create_delete_sequence(self):
        marker = GenomicSequenceBiolomics()
        marker.marker_id = 'GGAAUUA'
        marker.marker_seq = 'aattgacgat'
        marker.marker_type = 'CaM'
        marker.record_name = 'peioMarker'
        new_marker = self.client.create('sequence', marker)
        self.assertEqual(new_marker.marker_id, 'GGAAUUA')
        self.assertEqual(new_marker.marker_seq, 'aattgacgat')
        self.assertEqual(new_marker.marker_type, 'CaM')
        self.assertEqual(new_marker.record_name, 'peioMarker')
        self.assertTrue(new_marker.record_id)
        self.client.delete_by_id('sequence', new_marker.record_id)
 if __name__ == "__main__":
    # import sys;sys.argv = ['', 'BiolomicsClient.Test.test_get_strain_by_id']
    unittest.main()
--- a/tests/biolomics/test_serializers.py
+++ b/tests/biolomics/test_serializers.py
@ -0,0 +1,727 @@
 import unittest
 import pycountry
 import deepdiff
 from pprint import pprint
 from mirri.biolomics.serializers.sequence import (
    GenomicSequenceBiolomics,
    serialize_to_biolomics as sequence_to_biolomics,
    serialize_from_biolomics as sequence_from_biolomics)
 from mirri.biolomics.serializers.strain import (
    serialize_to_biolomics as strain_to_biolomics,
    serialize_from_biolomics as strain_from_biolomics)
 from mirri.biolomics.serializers.growth_media import (
    # serialize_to_biolomics as growth_medium_to_biolomics,
    serialize_from_biolomics as growth_medium_from_biolomics)
 from mirri.biolomics.serializers.bibliography import (
    serializer_from_biolomics as literature_from_biolomics,
    serializer_to_biolomics as literature_to_biolomics
 )
 from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
 from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
 from mirri.entities.publication import Publication
 from .utils import create_full_data_strain, VERSION, SERVER_URL
 STRAIN_WS = {
    'CreationDate': '2021-05-19T12:22:33',
    'CreatorUserName': 'pziarsolo@cect.org',
    'LastChangeDate': '2021-05-19T12:22:36',
    'LastChangeUserName': 'pziarsolo@cect.org',
    'RecordDetails': {'ABS related files': {'FieldType': 21,
                                            'Value': [{'Name': 'link',
                                                       'Value': 'https://example.com'}]},
                      'Altitude of geographic origin': {'FieldType': 4,
                                                        'Value': 121.0},
                      'Applications': {'FieldType': 5, 'Value': 'health'},
                      'Catalog URL': {'FieldType': 21, 'Value': []},
                      'Collection accession number': {'FieldType': 5,
                                                      'Value': 'TESTCC 1'},
                      'Collection date': {'FieldType': 8, 'Value': '1991/01/01'},
                      'Collector': {'FieldType': 5, 'Value': 'the collector'},
                      'Comment on taxonomy': {'FieldType': 5,
                                              'Value': 'lalalalla'},
                      'Coordinates of geographic origin': {'FieldType': 12,
                                                           'Value': {'Altitude': 0.0,
                                                                     'Latitude': 23.3,
                                                                     'Longitude': 23.3,
                                                                     'Precision': 0.0}},
                      'Country': {'FieldType': 118,
                                  'Value': [{'Name': {'FieldType': 5,
                                                      'Value': 'Spain'},
                                             'RecordId': 54,
                                             'TargetFieldValue': None}]},
                      'Data provided by': {'FieldType': 22, 'Value': 'Unknown'},
                      'Date of inclusion in the catalogue': {'FieldType': 8,
                                                             'Value': '1985/05/02'},
                      'Deposit date': {'FieldType': 8, 'Value': '1985/05/02'},
                      'Depositor': {'FieldType': 5,
                                    'Value': 'NCTC, National Collection of Type '
                                             'Cultures - NCTC, London, United '
                                             'Kingdom of Great Britain and '
                                             'Northern Ireland.'},
                      'Dual use': {'FieldType': 20, 'Value': 'yes'},
                      'Enzyme production': {'FieldType': 5,
                                            'Value': 'some enzimes'},
                      'Form': {'FieldType': 3,
                               'Value': [{'Name': 'Agar', 'Value': 'yes'},
                                         {'Name': 'Cryo', 'Value': 'no'},
                                         {'Name': 'Dry Ice', 'Value': 'no'},
                                         {'Name': 'Liquid Culture Medium',
                                          'Value': 'no'},
                                         {'Name': 'Lyo', 'Value': 'yes'},
                                         {'Name': 'Oil', 'Value': 'no'},
                                         {'Name': 'Water', 'Value': 'no'}]},
                      'GMO': {'FieldType': 22, 'Value': 'Yes'},
                      'GMO construction information': {'FieldType': 5,
                                                       'Value': 'instructrion to '
                                                                'build'},
                      'Genotype': {'FieldType': 5, 'Value': 'some genotupe'},
                      'Geographic origin': {'FieldType': 5,
                                            'Value': 'una state; one '
                                                     'municipality; somewhere in '
                                                     'the world'},
                      'History': {'FieldType': 5,
                                  'Value': 'newer < In the middle < older'},
                      'Infrasubspecific names': {'FieldType': 5,
                                                 'Value': 'serovar tete'},
                      'Interspecific hybrid': {'FieldType': 20, 'Value': 'no'},
                      'Isolation date': {'FieldType': 8, 'Value': '1900/01/01'},
                      'Isolation habitat': {'FieldType': 5,
                                            'Value': 'some habitat'},
                      'Isolator': {'FieldType': 5, 'Value': 'the isolator'},
                      'Literature': {'FieldType': 118, 'Value': []},
                      'MTA files URL': {'FieldType': 21,
                                        'Value': [{'Name': 'link',
                                                   'Value': 'https://example.com'}]},
                      'MTA text': {'FieldType': 5, 'Value': ''},
                      'Metabolites production': {'FieldType': 5,
                                                 'Value': 'big factory of cheese'},
                      'Mutant information': {'FieldType': 5, 'Value': 'x-men'},
                      'Nagoya protocol restrictions and compliance conditions': {'FieldType': 20,
                                                                                 'Value': 'no '
                                                                                          'known '
                                                                                          'restrictions '
                                                                                          'under '
                                                                                          'the '
                                                                                          'Nagoya '
                                                                                          'protocol'},
                      'Ontobiotope': {'FieldType': 118,
                                      'Value': [{'Name': {'FieldType': 5,
                                                          'Value': 'anaerobic '
                                                                   'bioreactor '
                                                                   '(OBT:000190)'},
                                                 'RecordId': 100,
                                                 'TargetFieldValue': None}]},
                      'Ontobiotope term for the isolation habitat': {'FieldType': 5,
                                                                     'Value': ''},
                      'Orders': {'FieldType': 118, 'Value': []},
                      'Organism type': {'FieldType': 3,
                                        'Value': [{'Name': 'Algae', 'Value': 'no'},
                                                  {'Name': 'Archaea',
                                                   'Value': 'yes'},
                                                  {'Name': 'Bacteria',
                                                   'Value': 'no'},
                                                  {'Name': 'Cyanobacteria',
                                                   'Value': 'no'},
                                                  {'Name': 'Filamentous Fungi',
                                                   'Value': 'no'},
                                                  {'Name': 'Phage', 'Value': 'no'},
                                                  {'Name': 'Plasmid',
                                                   'Value': 'no'},
                                                  {'Name': 'Virus', 'Value': 'no'},
                                                  {'Name': 'Yeast', 'Value': 'no'},
                                                  {'Name': 'Microalgae',
                                                   'Value': '?'}]},
                      'Other culture collection numbers': {'FieldType': 5,
                                                           'Value': 'aaa a; aaa3 '
                                                                    'a3'},
                      'Other denomination': {'FieldType': 5, 'Value': ''},
                      'Pathogenicity': {'FieldType': 5, 'Value': 'illness'},
                      'Plasmids': {'FieldType': 5, 'Value': 'asda'},
                      'Plasmids collections fields': {'FieldType': 5,
                                                      'Value': 'asdasda'},
                      'Ploidy': {'FieldType': 20, 'Value': 'Polyploid'},
                      'Quarantine in Europe': {'FieldType': 20, 'Value': 'no'},
                      'Recommended growth medium': {'FieldType': 118,
                                                    'Value': [{'Name': {'FieldType': 5,
                                                                        'Value': 'AAA'},
                                                               'RecordId': 1,
                                                               'TargetFieldValue': None}]},
                      'Recommended growth temperature': {'FieldType': 19,
                                                         'MaxValue': 30.0,
                                                         'MinValue': 30.0},
                      'Remarks': {'FieldType': 5, 'Value': 'no remarks for me'},
                      'Restrictions on use': {'FieldType': 20,
                                              'Value': 'no restriction apply'},
                      'Risk group': {'FieldType': 20, 'Value': '1'},
                      'Sequences 16s': {"Value": [
                          {
                              "Name": {
                                  "Value": "X76436",
                                  "FieldType": 5
                              },
                              "RecordId": 50992,
                              "TargetFieldValue": {
                                  "Value": {
                                      "Sequence": ""
                                  },
                                  "FieldType": 14
                              }
                          }
                      ],
                         "FieldType": 114},
                      'Sequences 18S rRNA': {'FieldType': 114, 'Value': []},
                      'Sequences 23S rRNA': {'FieldType': 114, 'Value': []},
                      'Sequences ACT': {'FieldType': 114, 'Value': []},
                      'Sequences AmdS': {'FieldType': 114, 'Value': []},
                      'Sequences Amds12': {'FieldType': 114, 'Value': []},
                      'Sequences Beta tubulin': {'FieldType': 114, 'Value': []},
                      'Sequences COX1': {'FieldType': 114, 'Value': []},
                      'Sequences COX2': {'FieldType': 114, 'Value': []},
                      'Sequences CaM': {'FieldType': 114, 'Value': []},
                      'Sequences Cct8': {'FieldType': 114, 'Value': []},
                      'Sequences Cit1': {'FieldType': 114, 'Value': []},
                      'Sequences CypA': {'FieldType': 114, 'Value': []},
                      'Sequences GDP': {'FieldType': 114, 'Value': []},
                      'Sequences GPD': {'FieldType': 114, 'Value': []},
                      'Sequences Genome': {'FieldType': 114, 'Value': []},
                      'Sequences HIS': {'FieldType': 114, 'Value': []},
                      'Sequences HSP': {'FieldType': 114, 'Value': []},
                      'Sequences IDH': {'FieldType': 114, 'Value': []},
                      'Sequences IGS': {'FieldType': 114, 'Value': []},
                      'Sequences ITS': {'FieldType': 114, 'Value': []},
                      'Sequences LSU': {'FieldType': 114, 'Value': []},
                      'Sequences MAT': {'FieldType': 114, 'Value': []},
                      'Sequences MAT1': {'FieldType': 114, 'Value': []},
                      'Sequences Miscellaneous': {'FieldType': 114, 'Value': []},
                      'Sequences NorA': {'FieldType': 114, 'Value': []},
                      'Sequences NorB': {'FieldType': 114, 'Value': []},
                      'Sequences Omt12': {'FieldType': 114, 'Value': []},
                      'Sequences OmtA': {'FieldType': 114, 'Value': []},
                      'Sequences PcCYP': {'FieldType': 114, 'Value': []},
                      'Sequences PpgA': {'FieldType': 114, 'Value': []},
                      'Sequences PreA': {'FieldType': 114, 'Value': []},
                      'Sequences PreB': {'FieldType': 114, 'Value': []},
                      'Sequences RAPD': {'FieldType': 114, 'Value': []},
                      'Sequences RPB1': {'FieldType': 114, 'Value': []},
                      'Sequences RPB2': {'FieldType': 114, 'Value': []},
                      'Sequences SSU': {'FieldType': 114, 'Value': []},
                      'Sequences TEF1a': {'FieldType': 114, 'Value': []},
                      'Sequences TEF2': {'FieldType': 114, 'Value': []},
                      'Sequences TUB': {'FieldType': 114, 'Value': []},
                      'Sequences Tsr1': {'FieldType': 114, 'Value': []},
                      'Sequences c16S rRNA': {'FieldType': 114, 'Value': []},
                      'Sequences cbhI': {'FieldType': 114, 'Value': []},
                      'Sequences mcm7': {'FieldType': 114, 'Value': []},
                      'Sequences rbcL': {'FieldType': 114, 'Value': []},
                      'Sexual state': {'FieldType': 5, 'Value': 'MT+A'},
                      'Status': {'FieldType': 5,
                                 'Value': 'type of Bacillus alcalophilus'},
                      'Strain from a registered collection': {'FieldType': 20,
                                                              'Value': 'no'},
                      'Substrate of isolation': {'FieldType': 5,
                                                 'Value': 'some substrate'},
                      'Taxon name': {'FieldType': 109,
                                     'Value': [{'Name': {'FieldType': 5,
                                                         'Value': 'Escherichia '
                                                                  'coli'},
                                                'RecordId': 100004123,
                                                'TargetFieldValue': {'DesktopInfo': None,
                                                                     'DesktopInfoHtml': '<b>Current '
                                                                                        'name: '
                                                                                        '</b><i>Escherichia '
                                                                                        'coli</i> '
                                                                                        '(Migula '
                                                                                        '1895) '
                                                                                        'Castellani '
                                                                                        'and '
                                                                                        'Chalmers '
                                                                                        '1919',
                                                                     'FieldType': 27,
                                                                     'NewSynFieldInfo': None,
                                                                     'ObligateSynonymId': 0,
                                                                     'OriginalSynFieldInfo': None,
                                                                     'SynInfo': {'BasionymRecord': {'NameInfo': '',
                                                                                                    'RecordId': 100004123,
                                                                                                    'RecordName': '<i>Escherichia '
                                                                                                                  'coli</i> '
                                                                                                                  '(Migula '
                                                                                                                  '1895) '
                                                                                                                  'Castellani '
                                                                                                                  'and '
                                                                                                                  'Chalmers '
                                                                                                                  '1919',
                                                                                                    'SecondLevelRecords': None},
                                                                                 'CurrentNameRecord': {'NameInfo': '',
                                                                                                       'RecordId': 100004123,
                                                                                                       'RecordName': '<i>Escherichia '
                                                                                                                     'coli</i> '
                                                                                                                     '(Migula '
                                                                                                                     '1895) '
                                                                                                                     'Castellani '
                                                                                                                     'and '
                                                                                                                     'Chalmers '
                                                                                                                     '1919',
                                                                                                       'SecondLevelRecords': None},
                                                                                 'ObligateSynonymRecords': [],
                                                                                 'SelectedRecord': {
                                                                                     'NameInfo': '<i>Escherichia '
                                                                                                 'coli</i> '
                                                                                                 '(Migula '
                                                                                                 '1895) '
                                                                                                 'Castellani '
                                                                                                 'and '
                                                                                                 'Chalmers '
                                                                                                 '1919',
                                                                                     'RecordId': 100004123,
                                                                                     'RecordName': '<i>Escherichia '
                                                                                                   'coli</i> '
                                                                                                   '(Migula '
                                                                                                   '1895) '
                                                                                                   'Castellani '
                                                                                                   'and '
                                                                                                   'Chalmers '
                                                                                                   '1919',
                                                                                     'SecondLevelRecords': None},
                                                                                 'TaxonSynonymsRecords': []},
                                                                     'SynonymId': 100004123}}]},
                      'Tested temperature growth range': {'FieldType': 19,
                                                          'MaxValue': 32.0,
                                                          'MinValue': 29.0},
                      'Type description': {'FieldType': 5, 'Value': ''}},
    'RecordId': 148038,
    'RecordName': 'MIRRI 2240561'}
 STRAIN_WS_EXPECTED_NO_REMOTE = {
    'Acronym': 'MIRRI',
    'RecordDetails': {'ABS related files': {'FieldType': 'U',
                                            'Value': [{'Name': 'link',
                                                       'Value': 'https://example.com'}]},
                      'Altitude of geographic origin': {'FieldType': 'D',
                                                        'Value': 121},
                      'Applications': {'FieldType': 'E', 'Value': 'health'},
                      'Collection accession number': {'FieldType': 'E',
                                                      'Value': 'TESTCC 1'},
                      'Collection date': {'FieldType': 'H', 'Value': '1991-01-01'},
                      'Collector': {'FieldType': 'E', 'Value': 'the collector'},
                      'Comment on taxonomy': {'FieldType': 'E',
                                              'Value': 'lalalalla'},
                      'Coordinates of geographic origin': {'FieldType': 'L',
                                                           'Value': {'Latitude': 23.3,
                                                                     'Longitude': 23.3}},
                      'Date of inclusion in the catalogue': {'FieldType': 'H',
                                                             'Value': '1985-05-02'},
                      'Deposit date': {'FieldType': 'H', 'Value': '1985-05-02'},
                      'Depositor': {'FieldType': 'E',
                                    'Value': 'NCTC, National Collection of Type '
                                             'Cultures - NCTC, London, United '
                                             'Kingdom of Great Britain and '
                                             'Northern Ireland.'},
                      'Dual use': {'FieldType': 'T', 'Value': 'yes'},
                      'Enzyme production': {'FieldType': 'E',
                                            'Value': 'some enzimes'},
                      'Form': {'FieldType': 'C',
                               'Value': [{'Name': 'Agar', 'Value': 'yes'},
                                         {'Name': 'Cryo', 'Value': 'no'},
                                         {'Name': 'Dry Ice', 'Value': 'no'},
                                         {'Name': 'Liquid Culture Medium',
                                          'Value': 'no'},
                                         {'Name': 'Lyo', 'Value': 'yes'},
                                         {'Name': 'Oil', 'Value': 'no'},
                                         {'Name': 'Water', 'Value': 'no'}]},
                      'GMO': {'FieldType': 'V', 'Value': 'Yes'},
                      'GMO construction information': {'FieldType': 'E',
                                                       'Value': 'instructrion to '
                                                                'build'},
                      'Genotype': {'FieldType': 'E', 'Value': 'some genotupe'},
                      'Geographic origin': {'FieldType': 'E',
                                            'Value': 'una state; one '
                                                     'municipality; somewhere in '
                                                     'the world'},
                      'History': {'FieldType': 'E',
                                  'Value': 'firstplave < seconn place < third '
                                           'place'},
                      'Infrasubspecific names': {'FieldType': 'E',
                                                 'Value': 'serovar tete'},
                      'Interspecific hybrid': {'FieldType': 'T', 'Value': 'no'},
                      'Isolation date': {'FieldType': 'H', 'Value': '1900-01-01'},
                      'Isolation habitat': {'FieldType': 'E',
                                            'Value': 'some habitat'},
                      'Isolator': {'FieldType': 'E', 'Value': 'the isolator'},
                      'MTA files URL': {'FieldType': 'U',
                                        'Value': [{'Name': 'link',
                                                   'Value': 'https://example.com'}]},
                      'Metabolites production': {'FieldType': 'E',
                                                 'Value': 'big factory of cheese'},
                      'Mutant information': {'FieldType': 'E', 'Value': 'x-men'},
                      'Nagoya protocol restrictions and compliance conditions': {'FieldType': 'T',
                                                                                 'Value': 'no '
                                                                                          'known '
                                                                                          'restrictions '
                                                                                          'under '
                                                                                          'the '
                                                                                          'Nagoya '
                                                                                          'protocol'},
                      'Ontobiotope': {'FieldType': 'RLink', 'Value': 'OBT:000190'},
                      'Organism type': {'FieldType': 'C',
                                        'Value': [{'Name': 'Algae', 'Value': 'no'},
                                                  {'Name': 'Archaea',
                                                   'Value': 'yes'},
                                                  {'Name': 'Bacteria',
                                                   'Value': 'no'},
                                                  {'Name': 'Cyanobacteria',
                                                   'Value': 'no'},
                                                  {'Name': 'Filamentous Fungi',
                                                   'Value': 'no'},
                                                  {'Name': 'Phage', 'Value': 'no'},
                                                  {'Name': 'Plasmid',
                                                   'Value': 'no'},
                                                  {'Name': 'Virus', 'Value': 'no'},
                                                  {'Name': 'Yeast',
                                                   'Value': 'no'}]},
                      'Other culture collection numbers': {'FieldType': 'E',
                                                           'Value': 'aaa a; aaa3 '
                                                                    'a3'},
                      'Pathogenicity': {'FieldType': 'E', 'Value': 'illness'},
                      'Plasmids': {'FieldType': 'E', 'Value': 'asda'},
                      'Plasmids collections fields': {'FieldType': 'E',
                                                      'Value': 'asdasda'},
                      'Ploidy': {'FieldType': 'T', 'Value': 'Polyploid'},
                      'Quarantine in Europe': {'FieldType': 'T', 'Value': 'no'},
                      'Recommended growth temperature': {'FieldType': 'S',
                                                         'MaxValue': 30.0,
                                                         'MinValue': 30.0},
                      'Remarks': {'FieldType': 'E', 'Value': 'no remarks for me'},
                      'Restrictions on use': {'FieldType': 'T',
                                              'Value': 'no restriction apply'},
                      'Risk group': {'FieldType': 'T', 'Value': '1'},
                      'Sexual state': {'FieldType': 'E', 'Value': 'MT+A'},
                      'Status': {'FieldType': 'E',
                                 'Value': 'type of Bacillus alcalophilus'},
                      'Strain from a registered collection': {'FieldType': 'T',
                                                              'Value': 'no'},
                      'Substrate of isolation': {'FieldType': 'E',
                                                 'Value': 'some substrate'},
                      'Taxon name': {'FieldType': 'SynLink',
                                     'Value': 'Escherichia coli'},
                      'Tested temperature growth range': {'FieldType': 'S',
                                                          'MaxValue': 32.0,
                                                          'MinValue': 29.0}}}
 class StrainSerializerTest(unittest.TestCase):
    def test_serialize_to_biolomics(self):
        strain = create_full_data_strain()
        ws_strain = strain_to_biolomics(strain, client=None)
        self.assertDictEqual(ws_strain, STRAIN_WS_EXPECTED_NO_REMOTE)
    def test_serialize_to_biolomics_remote(self):
        client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
                                      SECRET_ID, USERNAME, PASSWORD)
        strain = create_full_data_strain()
        marker = GenomicSequenceBiolomics()
        marker.marker_id = "MUM 02.15 - Beta tubulin"
        marker.marker_type = 'TUBB'
        strain.genetics.markers = [marker]
        ws_strain = strain_to_biolomics(strain, client=client)
        self.assertEqual(strain.collect.habitat_ontobiotope,
                         ws_strain['RecordDetails']['Ontobiotope']['Value'][0]['Name']['Value'])
        self.assertEqual(pycountry.countries.get(alpha_3=strain.collect.location.country).name,
                         ws_strain['RecordDetails']['Country']['Value'][0]['Name']['Value'])
        self.assertEqual(strain.publications[0].title,
                         ws_strain['RecordDetails']['Literature']['Value'][0]['Name']['Value'])
        self.assertEqual(strain.genetics.markers[0].marker_id,
                         ws_strain['RecordDetails']['Sequences TUB']['Value'][0]['Name']['Value'])
    def test_serialize_from_biolomics(self):
        ws_strain = STRAIN_WS
        strain = strain_from_biolomics(ws_strain)
        self.assertEqual(strain.record_id, 148038)
        self.assertEqual(strain.record_name, 'MIRRI 2240561')
        self.assertEqual(strain.taxonomy.long_name, 'Escherichia coli')
        self.assertEqual(strain.growth.recommended_media, ['AAA'])
        self.assertEqual(strain.collect.location.altitude, 121)
        self.assertEqual(strain.collect.location.country, 'ESP')
        self.assertEqual(strain.applications, 'health')
        self.assertEqual(strain.id.strain_id, 'TESTCC 1')
        self.assertEqual(strain.collect.date.strfdate, '19910101')
        self.assertEqual(strain.taxonomy.comments, 'lalalalla')
        self.assertEqual(strain.catalog_inclusion_date.strfdate, '19850502')
        self.assertIn('NCTC, National Collection of Type ', strain.deposit.who)
        self.assertTrue(strain.is_potentially_harmful)
        self.assertEqual(strain.form_of_supply, ['Agar', 'Lyo'])
        self.assertTrue(strain.genetics.gmo)
        self.assertEqual(strain.genetics.gmo_construction, 'instructrion to build')
        self.assertEqual(strain.genetics.genotype, 'some genotupe')
        self.assertEqual(strain.history, ['newer', 'In the middle', 'older'])
        self.assertEqual(strain.taxonomy.infrasubspecific_name, 'serovar tete')
        self.assertEqual(strain.isolation.who, 'the isolator')
        self.assertEqual(strain.isolation.date.strfdate, '19000101')
        self.assertEqual(strain.mta_files, ['https://example.com'])
        self.assertEqual(strain.genetics.mutant_info, 'x-men')
        self.assertEqual(strain.collect.habitat_ontobiotope, 'OBT:000190')
        self.assertEqual(strain.taxonomy.organism_type[0].name, 'Archaea')
        self.assertEqual(strain.other_numbers[0].strain_id, 'aaa a')
        self.assertEqual(strain.other_numbers[1].strain_id, 'aaa3 a3')
        self.assertEqual(strain.pathogenicity, 'illness')
        self.assertEqual(strain.genetics.plasmids, ['asda'])
        self.assertEqual(strain.genetics.ploidy, 9)
        self.assertFalse(strain.is_subject_to_quarantine)
        self.assertEqual(strain.risk_group, '1')
        self.assertFalse(strain.is_from_registered_collection)
        self.assertEqual(strain.growth.tested_temp_range, {'min': 29, 'max': 32})
 BIOLOMICSSEQ = {
    'RecordDetails': {
        'Barcode level': {'FieldType': 20, 'Value': 'undefined'},
        'DNA extract number': {'FieldType': 5, 'Value': ''},
        'DNA sequence': {'FieldType': 14,
                         'Value': {'Sequence': 'caaaggaggccttctccctcttcgtaag'}},
        'Editing state': {'FieldType': 20, 'Value': 'Auto import'},
        'Forward primer(s)': {'FieldType': 5, 'Value': ''},
        'Genbank': {'FieldType': 21, 'Value': []},
        'INSDC number': {'FieldType': 5, 'Value': 'AATGAT'},
        'Literature': {'FieldType': 21, 'Value': []},
        'Literature1': {'FieldType': 118, 'Value': []},
        'Marker name': {'FieldType': 5, 'Value': 'CaM'},
        'Privacy': {'FieldType': 20, 'Value': 'undefined'},
        'Quality': {'FieldType': 5, 'Value': ''},
        'Remarks': {'FieldType': 5, 'Value': ''},
        'Reverse primer(s)': {'FieldType': 5, 'Value': ''},
        'Review state': {'FieldType': 5, 'Value': ''},
        'Strain number': {'FieldType': 5, 'Value': 'MUM 02.54'}},
    'RecordId': 101,
    'RecordName': 'MUM 02.54 - CaM'}
 class SequenceSerializerTest(unittest.TestCase):
    def test_from_biolomics(self):
        marker = sequence_from_biolomics(BIOLOMICSSEQ)
        self.assertEqual(marker.record_name, BIOLOMICSSEQ['RecordName'])
        self.assertEqual(marker.record_id, BIOLOMICSSEQ['RecordId'])
        self.assertEqual(marker.marker_type, BIOLOMICSSEQ['RecordDetails']['Marker name']['Value'])
        self.assertEqual(marker.marker_id, BIOLOMICSSEQ['RecordDetails']['INSDC number']['Value'])
        self.assertEqual(marker.marker_seq, BIOLOMICSSEQ['RecordDetails']['DNA sequence']['Value']['Sequence'])
    def test_to_biolomics(self):
        marker = GenomicSequenceBiolomics()
        marker.marker_id = 'GGAAUUA'
        marker.marker_seq = 'aattgacgat'
        marker.marker_type = 'CaM'
        marker.record_name = 'peioMarker'
        marker.record_id = 111
        ws_seq = sequence_to_biolomics(marker)
        expected = {'RecordId': marker.record_id,
                    'RecordName': marker.record_name,
                    'RecordDetails': {
                        'INSDC number': {'Value': marker.marker_id, 'FieldType': 'E'},
                        'DNA sequence': {'Value': {'Sequence': marker.marker_seq}, 'FieldType': 'N'},
                        'Marker name': {'Value': marker.marker_type, 'FieldType': 'E'}}}
        self.assertEqual(ws_seq, expected)
 BIOLOMICS_MEDIUM = {
    "RecordId": 100,
    "RecordName": "MA20S",
    "RecordDetails": {
        "Full description": {
            "Value": "mout agar+20% saccharose",
            "FieldType": 5
        },
        "Ingredients": {
            "Value": "Malt extract\r\n\tDilute brewery malt with water to 10% sugar solution (level 10 on Brix saccharose meter), 15 minutes at 121 C\r\nsaccharose\t200g\r\ndistilled water\t0.6l\r\nagar\t15g\r\n",
            "FieldType": 5
        },
        "Link to full description": {
            "Value": [],
            "FieldType": 21
        },
        "Medium description": {
            "Value": "",
            "FieldType": 5
        },
        "Other name": {
            "Value": "",
            "FieldType": 5
        },
        "pH": {
            "Value": "7 with KOH",
            "FieldType": 5
        },
        "Remarks": {
            "Value": "",
            "FieldType": 5
        },
        "Reference": {
            "Value": "",
            "FieldType": 5
        },
        "Sterilization conditions": {
            "Value": "15 minutes at 121 C",
            "FieldType": 5
        }
    }
 }
 class MediumSerializerTest(unittest.TestCase):
    def test_from_biolomics(self):
        medium = growth_medium_from_biolomics(BIOLOMICS_MEDIUM)
        self.assertEqual(medium.record_id, BIOLOMICS_MEDIUM['RecordId'])
        self.assertEqual(medium.record_name, BIOLOMICS_MEDIUM['RecordName'])
        self.assertEqual(medium.ingredients, BIOLOMICS_MEDIUM['RecordDetails']['Ingredients']['Value'])
        self.assertEqual(medium.full_description, BIOLOMICS_MEDIUM['RecordDetails']['Full description']['Value'])
        self.assertEqual(medium.ph, BIOLOMICS_MEDIUM['RecordDetails']['pH']['Value'])
 BIOLOMICS_BIBLIOGRAPHY = {
    "RecordId": 100,
    "RecordName": "Miscellaneous notes on Mucoraceae",
    "RecordDetails": {
        "Associated strains": {
            "Value": [],
            "FieldType": 118
        },
        "Associated taxa": {
            "Value": [],
            "FieldType": 118
        },
        "Authors": {
            "Value": "Schipper, M.A.A.; Samson, R.A.",
            "FieldType": 5
        },
        "Associated sequences": {
            "Value": [],
            "FieldType": 118
        },
        "Abstract": {
            "Value": "",
            "FieldType": 5
        },
        "Collection": {
            "Value": "",
            "FieldType": 5
        },
        "DOI number": {
            "Value": "",
            "FieldType": 5
        },
        "Editor(s)": {
            "Value": "",
            "FieldType": 5
        },
        "Full reference": {
            "Value": "",
            "FieldType": 5
        },
        "Hyperlink": {
            "Value": [],
            "FieldType": 21
        },
        "ISBN": {
            "Value": "",
            "FieldType": 5
        },
        "ISSN": {
            "Value": "",
            "FieldType": 5
        },
        "Issue": {
            "Value": "",
            "FieldType": 5
        },
        "Journal": {
            "Value": "Mycotaxon",
            "FieldType": 5
        },
        "Journal-Book": {
            "Value": "",
            "FieldType": 5
        },
        "Keywords": {
            "Value": "",
            "FieldType": 5
        },
        "Page from": {
            "Value": "475",
            "FieldType": 5
        },
        "Page to": {
            "Value": "491",
            "FieldType": 5
        },
        "Publisher": {
            "Value": "",
            "FieldType": 5
        },
        "PubMed ID": {
            "Value": "",
            "FieldType": 5
        },
        "Volume": {
            "Value": "50",
            "FieldType": 5
        },
        "Year": {
            "Value": 1994,
            "FieldType": 4
        }
    }
 }
 class BibliographySerializerTest(unittest.TestCase):
    def test_from_biolomics(self):
        pub = literature_from_biolomics(BIOLOMICS_BIBLIOGRAPHY)
        self.assertEqual(pub.record_name, "Miscellaneous notes on Mucoraceae")
        self.assertEqual(pub.record_id, 100)
        self.assertEqual(pub.year, 1994)
        self.assertEqual(pub.authors, "Schipper, M.A.A.; Samson, R.A.")
    def test_to_biolomics(self):
        pub = Publication()
        pub.title = 'My title'
        pub.year = 1992
        pub.authors = 'me and myself'
        pub.pubmed_id = '1112222'
        pub.issue = 'issue'
        ws_data = literature_to_biolomics(pub)
        expected = {
            'RecordDetails': {
                'Authors': {'FieldType': 'E', 'Value': 'me and myself'},
                'PubMed ID': {'FieldType': 'E', 'Value': '1112222'},
                'Issue': {'FieldType': 'E', 'Value': 'issue'},
                'Year': {'FieldType': 'D', 'Value': 1992}},
            'RecordName': 'My title'}
        self.assertDictEqual(expected, ws_data)
    def test_to_biolomics2(self):
        pub = Publication()
        pub.pubmed_id = '1112222'
        ws_data = literature_to_biolomics(pub)
        expected = {
            'RecordDetails': {
                'PubMed ID': {'FieldType': 'E', 'Value': '1112222'}},
            'RecordName': f'PUBMED:{pub.pubmed_id}'}
        self.assertDictEqual(expected, ws_data)
        pub = Publication()
        pub.doi = 'doi.er/111/12131'
        ws_data = literature_to_biolomics(pub)
        expected = {
            'RecordDetails': {
                'DOI number': {'FieldType': 'E', 'Value': pub.doi}},
            'RecordName': f'DOI:{pub.doi}'}
        self.assertDictEqual(expected, ws_data)
 if __name__ == "__main__":
    import sys;
    sys.argv = ['', 'BibliographySerializerTest']
    unittest.main()
--- a/tests/biolomics/test_strain_operations.py
+++ b/tests/biolomics/test_strain_operations.py
@ -0,0 +1,156 @@
 import unittest
 from mirri.biolomics.remote.endoint_names import STRAIN_WS
 from .utils import VERSION, SERVER_URL, create_full_data_strain
 from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
 from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
 from mirri.biolomics.pipelines.strain import retrieve_strain_by_accession_number
 class BiolomicsStrainClientTest(unittest.TestCase):
    def setUp(self):
        self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
                                           SECRET_ID, USERNAME, PASSWORD)
    def test_retrieve_strain_by_id(self):
        record_id = 14803
        strain = self.client.retrieve_by_id(STRAIN_WS, record_id)
        self.assertEqual(strain.record_id, record_id)
        print(strain.record_name)
    def test_retrieve_strain_by_name(self):
        record_id = 14803
        record_name = 'MIRRI0014803'
        strain = self.client.retrieve_by_name(STRAIN_WS, record_name)
        self.assertEqual(strain.record_name, record_name)
        self.assertEqual(strain.record_id, record_id)
    def test_search_strain(self):
        accession_number = "BEA 0014B"
        query = {"Query": [{"Index": 0,
                            "FieldName": "Collection accession number",
                            "Operation": "TextExactMatch",
                            "Value": accession_number}],
                 "Expression": "Q0",
                 "DisplayStart": 0,
                 "DisplayLength": 10}
        search_response = self.client.search(STRAIN_WS, query)
        self.assertEqual(search_response['total'], 1)
        self.assertEqual(search_response['records'][0].id.strain_id,
                         accession_number)
    def test_search_strain4(self):
        accession_number = "TESTCC 1"
        query = {"Query": [{"Index": 0,
                            "FieldName": "Collection accession number",
                            "Operation": "TextExactMatch",
                            "Value": accession_number}],
                 "Expression": "Q0",
                 "DisplayStart": 0,
                 "DisplayLength": 10}
        search_response = self.client.search(STRAIN_WS, query)
        for strain in search_response['records']:
            print(strain)
            self.client.delete_by_id(STRAIN_WS, strain.record_id)
    def test_search_strain_no_found(self):
        accession_number = "BEA 0014B_"
        query = {"Query": [{"Index": 0,
                            "FieldName": "Collection accession number",
                            "Operation": "TextExactMatch",
                            "Value": accession_number}],
                 "Expression": "Q0",
                 "DisplayStart": 0,
                 "DisplayLength": 10}
        search_response = self.client.search(STRAIN_WS, query)
        self.assertEqual(search_response['total'], 0)
        self.assertFalse(search_response['records'])
    def test_create_strain(self):
        strain = create_full_data_strain()
        strain.taxonomy.interspecific_hybrid = None
        record_id = None
        try:
            new_strain = self.client.create(STRAIN_WS, strain)
            record_id = new_strain.record_id
            self.assertIsNone(new_strain.taxonomy.interspecific_hybrid)
            self.assertEqual(new_strain.growth.recommended_media, ['AAA'])
            self.assertEqual(new_strain.id.strain_id, strain.id.strain_id)
        finally:
            if record_id is not None:
                self.client.delete_by_id(STRAIN_WS, record_id)
    def test_update_strain(self):
        strain = create_full_data_strain()
        record_id = None
        try:
            new_strain = self.client.create(STRAIN_WS, strain)
            record_id = new_strain.record_id
            self.assertEqual(new_strain.id.strain_id, strain.id.strain_id)
            self.assertFalse(new_strain.taxonomy.interspecific_hybrid)
            new_strain.id.number = '2'
            new_strain.taxonomy.interspecific_hybrid = None
            updated_strain = self.client.update(STRAIN_WS, new_strain)
            self.assertEqual(updated_strain.id.strain_id, new_strain.id.strain_id)
            self.assertIsNone(updated_strain.taxonomy.interspecific_hybrid)
            retrieved_strain = self.client.retrieve_by_id(STRAIN_WS, record_id)
            self.assertEqual(retrieved_strain.id.strain_id, new_strain.id.strain_id)
            self.assertIsNone(retrieved_strain.taxonomy.interspecific_hybrid)
        finally:
            if record_id is not None:
                print('deleting')
                self.client.delete_by_id(STRAIN_WS, record_id)
    def test_update_strain_pathogenicity(self):
        strain = create_full_data_strain()
        print(strain.pathogenicity)
        record_id = None
        try:
            new_strain = self.client.create(STRAIN_WS, strain)
            record_id = new_strain.record_id
            self.assertEqual(new_strain.id.strain_id, strain.id.strain_id)
            self.assertEqual(new_strain.pathogenicity, 'illness')
            new_strain.pathogenicity = None
            updated_strain = self.client.update(STRAIN_WS, new_strain)
            self.assertEqual(updated_strain.id.strain_id, new_strain.id.strain_id)
            self.assertIsNone(updated_strain.pathogenicity)
            retrieved_strain = self.client.retrieve_by_id(STRAIN_WS, record_id)
            self.assertEqual(retrieved_strain.id.strain_id, new_strain.id.strain_id)
            self.assertIsNone(retrieved_strain.pathogenicity)
        finally:
            if record_id is not None:
                self.client.delete_by_id(STRAIN_WS, record_id)
    def test_search_by_accession_number(self):
        accession_number = "BEA 0014B"
        strain = retrieve_strain_by_accession_number(self.client, accession_number)
        self.assertEqual(strain.id.strain_id, accession_number)
    def test_search_by_accession_number(self):
        accession_number = "BEA 0014B_"
        strain = retrieve_strain_by_accession_number(self.client, accession_number)
        self.assertFalse(strain)
 class BiolomicsClientGrowthMediaTest(unittest.TestCase):
    def setUp(self):
        self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
                                           SECRET_ID, USERNAME, PASSWORD)
    def xtest_growth_media_by_name(self):
        gm = self.client.retrieve('growth_media', 'AAA')
        self.assertEqual(gm['Record Id'], 1)
 if __name__ == "__main__":
    # import sys;sys.argv = ['',
    #                        'BiolomicsWriter.test_mirri_excel_parser_invalid']
    unittest.main()
--- a/tests/biolomics/utils.py
+++ b/tests/biolomics/utils.py
@ -0,0 +1,99 @@
 from mirri.biolomics.serializers.strain import StrainMirri
 from mirri.entities.strain import StrainId, OrganismType
 from mirri.entities.sequence import GenomicSequence
 from mirri.entities.date_range import DateRange
 from mirri.entities.publication import Publication
 from mirri.settings import NAGOYA_NO_RESTRICTIONS
 VERSION = 'v2'
 SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
 def create_full_data_strain():
    strain = StrainMirri()
    strain.id.number = "1"
    strain.id.collection = "TESTCC"
    strain.id.url = "https://cect/2342"
    strain.restriction_on_use = "no_restriction"
    strain.nagoya_protocol = NAGOYA_NO_RESTRICTIONS
    strain.abs_related_files = ['https://example.com']
    strain.mta_files = ['https://example.com']
    strain.other_numbers.append(StrainId(collection="aaa", number="a"))
    strain.other_numbers.append(StrainId(collection="aaa3", number="a3"))
    strain.is_from_registered_collection = False
    strain.risk_group = '1'
    strain.is_potentially_harmful = True
    strain.is_subject_to_quarantine = False
    strain.taxonomy.organism_type = [OrganismType(2)]
    strain.taxonomy.genus = 'Escherichia'
    strain.taxonomy.species = 'coli'
    strain.taxonomy.interspecific_hybrid = False
    strain.taxonomy.infrasubspecific_name = 'serovar tete'
    strain.taxonomy.comments = 'lalalalla'
    strain.status = "type of Bacillus alcalophilus"
    strain.history = 'firstplave < seconn place < third place'
    strain.deposit.who = "NCTC, National Collection of Type Cultures - NCTC, London, United Kingdom of Great Britain and Northern Ireland."
    strain.deposit.date = DateRange(year=1985, month=5, day=2)
    strain.catalog_inclusion_date = DateRange(year=1985, month=5, day=2)
    strain.collect.location.country = "ESP"
    strain.collect.location.state = "una state"
    strain.collect.location.municipality = "one municipality"
    strain.collect.location.longitude = 23.3
    strain.collect.location.latitude = 23.3
    strain.collect.location.altitude = 121
    strain.collect.location.site = "somewhere in the world"
    strain.collect.habitat_ontobiotope = "OBT:000190"
    strain.collect.habitat = 'some habitat'
    strain.collect.who = "the collector"
    strain.collect.date = DateRange(year=1991)
    strain.isolation.date = DateRange(year=1900)
    strain.isolation.who = 'the isolator'
    strain.isolation.substrate_host_of_isolation = 'some substrate'
    # already existing media in test_mirri
    strain.growth.recommended_temp = {'min': 30, 'max': 30}
    strain.growth.recommended_media = ["AAA"]
    strain.growth.tested_temp_range = {'min': 29, 'max': 32}
    strain.form_of_supply = ["Agar", "Lyo"]
    #strain.other_denominations = ["lajdflasjdldj"]
    gen_seq = GenomicSequence()
    gen_seq.marker_id = "pepe"
    gen_seq.marker_type = "16S rRNA"
    strain.genetics.markers.append(gen_seq)
    strain.genetics.ploidy = 9
    strain.genetics.genotype = 'some genotupe'
    strain.genetics.gmo = True
    strain.genetics.gmo_construction = 'instructrion to build'
    strain.genetics.mutant_info = 'x-men'
    strain.genetics.sexual_state = 'MT+A'
    strain.genetics.plasmids = ['asda']
    strain.genetics.plasmids_in_collections = ['asdasda']
    pub = Publication()
    pub.title = "The genus Amylomyces"
    strain.publications = [pub]
    strain.plant_pathogenicity_code = 'PATH:001'
    strain.pathogenicity = 'illness'
    strain.enzyme_production = 'some enzimes'
    strain.production_of_metabolites = 'big factory of cheese'
    strain.applications = 'health'
    strain.remarks = 'no remarks for me'
    return strain
 if __name__ == '__main__':
    strain = create_full_data_strain()
    print(strain.collect.habitat_ontobiotope)
--- a/tests/data/invalid_content.mirri.xlsx
+++ b/tests/data/invalid_content.mirri.xlsx
--- a/tests/data/invalid_excel.mirri.json
+++ b/tests/data/invalid_excel.mirri.json
@ -0,0 +1,5 @@
 {
    "key1": "value1",
    "key2": "value2",
    "key3": "value3"
 }
--- a/tests/data/invalid_structure.mirri.xlsx
+++ b/tests/data/invalid_structure.mirri.xlsx
--- a/tests/data/valid.mirri.full.xlsx
+++ b/tests/data/valid.mirri.full.xlsx
--- a/tests/data/valid.mirri.xlsx
+++ b/tests/data/valid.mirri.xlsx
--- a/tests/test_entities.py
+++ b/tests/test_entities.py
@ -0,0 +1,318 @@
 """
 Created on 2020(e)ko abe. 2(a)
@author: peio
 """
 import unittest
 from mirri.entities.publication import Publication
 from mirri.entities.date_range import DateRange
 from mirri.entities.location import Location
 from mirri.entities.sequence import GenomicSequence
 from mirri.entities.strain import (
    Collect,
    Deposit,
    Isolation,
    ValidationError,
    OrganismType,
    Strain,
    StrainId,
    Taxonomy,
 )
 from mirri.settings import (
    COLLECT,
    COUNTRY,
    DATE_OF_ISOLATION,
    DEPOSIT,
    DEPOSITOR,
    GENETICS,
    GROWTH,
    ISOLATED_BY,
    ISOLATION,
    LOCATION,
    MARKERS,
    NAGOYA_DOCS_AVAILABLE,
    NAGOYA_PROTOCOL,
    ORGANISM_TYPE,
    OTHER_CULTURE_NUMBERS,
    PLOIDY,
    RECOMMENDED_GROWTH_MEDIUM,
    TAXONOMY,
    DATE_OF_INCLUSION, NO_RESTRICTION
 )
 from mirri.validation.entity_validators import validate_strain
 class TestDataRange(unittest.TestCase):
    def test_data_range_init(self):
        dr = DateRange()
        self.assertFalse(dr)
        self.assertEqual(dr.__str__(), "")
        self.assertEqual(dr.range["start"], None)
        self.assertEqual(dr.range["end"], None)
        dr.strpdate("2012")
        self.assertEqual(dr.strfdate, "2012----")
        self.assertTrue(dr)
        dr.strpdate("2012----")
        self.assertEqual(dr.strfdate, "2012----")
        dr.strpdate("201212--")
        self.assertEqual(dr.strfdate, "201212--")
        try:
            dr.strpdate("201213--")
            self.fail()
        except ValueError:
            pass
        try:
            dr = DateRange(year=2012, month=13)
            self.fail()
        except ValueError:
            pass
        dr = DateRange(year=2020)
        self.assertEqual(dr.strfdate, "2020----")
        dr2 = dr.strpdate("2012")
        self.assertEqual(dr2.range["start"].year, 2012)
        self.assertEqual(dr2.range["start"].month, 1)
        self.assertEqual(dr2.range["start"].day, 1)
        self.assertEqual(dr2.range["end"].year, 2012)
        self.assertEqual(dr2.range["end"].month, 12)
        self.assertEqual(dr2.range["end"].day, 31)
 class TestCollect(unittest.TestCase):
    def test_collect_basic(self):
        collect = Collect()
        self.assertEqual(collect.dict(), {})
        collect.location.country = "ESP"
        collect.date = DateRange().strpdate("2012----")
        collect.who = "pepito"
        self.assertEqual(
            dict(collect.dict()),
            {
                "location": {"countryOfOriginCode": "ESP"},
                "collected_by": "pepito",
                "date_of_collection": "2012----",
            },
        )
        self.assertEqual(collect.__str__(),
                         "Collected: Spain in 2012---- by pepito")
 class TestOrganismType(unittest.TestCase):
    def test_basic_usage(self):
        org_type = OrganismType(2)
        self.assertEqual(org_type.name, "Archaea")
        self.assertEqual(org_type.code, 2)
        try:
            org_type.ko = 'a'
            self.fail()
        except TypeError:
            pass
        org_type = OrganismType("Archaea")
 class TestTaxonomy(unittest.TestCase):
    def test_taxonomy_basic(self):
        taxonomy = Taxonomy()
        self.assertEqual(taxonomy.dict(), {})
        self.assertFalse(taxonomy)
    def test_taxonomy_with_data(self):
        taxonomy = Taxonomy()
        taxonomy.genus = "Bacilus"
        taxonomy.organism_type = [OrganismType("Archaea")]
        taxonomy.species = "vulgaris"
        self.assertEqual(taxonomy.long_name, "Bacilus vulgaris")
        # print(taxonomy.dict())
 class TestLocation(unittest.TestCase):
    def test_empty_init(self):
        loc = Location()
        self.assertEqual(loc.dict(), {})
        self.assertFalse(loc)
    def test_add_data(self):
        loc = Location()
        loc.country = "esp"
        self.assertEqual(loc.dict(), {COUNTRY: "esp"})
        loc.state = None
        self.assertEqual(loc.dict(), {COUNTRY: "esp"})
 class TestStrain(unittest.TestCase):
    def test_empty_strain(self):
        strain = Strain()
        self.assertEqual(strain.dict(), {})
    def test_strain_add_data(self):
        strain = Strain()
        strain.id.number = "5433"
        strain.id.collection = "CECT"
        strain.id.url = "https://cect/2342"
        try:
            strain.nagoya_protocol = "asdas"
            self.fail()
        except ValidationError:
            pass
        strain.nagoya_protocol = NAGOYA_DOCS_AVAILABLE
        strain.dict()[NAGOYA_PROTOCOL] = NAGOYA_DOCS_AVAILABLE
        strain.collect.location.country = "ESP"
        self.assertEqual(strain.dict()[COLLECT][LOCATION][COUNTRY], "ESP")
        strain.genetics.ploidy = 9
        self.assertEqual(strain.dict()[GENETICS][PLOIDY], 9)
        strain.growth.recommended_media = ["asd"]
        strain.isolation.date = DateRange(year=1900)
        self.assertEqual(strain.dict()[ISOLATION]
                         [DATE_OF_ISOLATION], "1900----")
        strain.deposit.who = "pepe"
        self.assertEqual(strain.dict()[DEPOSIT][DEPOSITOR], "pepe")
        strain.growth.recommended_media = ["11"]
        self.assertEqual(strain.dict()[GROWTH]
                         [RECOMMENDED_GROWTH_MEDIUM], ["11"])
        strain.taxonomy.organism_type = [OrganismType(2)]
        self.assertEqual(
            strain.dict()[TAXONOMY][ORGANISM_TYPE], [
                {"code": 2, "name": "Archaea"}]
        )
        strain.taxonomy.organism_type = [OrganismType("Algae")]
        self.assertEqual(
            strain.dict()[TAXONOMY][ORGANISM_TYPE], [
                {"code": 1, "name": "Algae"}]
        )
        strain.other_numbers.append(StrainId(collection="aaa", number="a"))
        strain.other_numbers.append(StrainId(collection="aaa3", number="a3"))
        self.assertEqual(
            strain.dict()[OTHER_CULTURE_NUMBERS],
            [
                {"collection_code": "aaa", "accession_number": "a"},
                {"collection_code": "aaa3", "accession_number": "a3"},
            ],
        )
        strain.form_of_supply = ["Agar", "Lyo"]
        gen_seq = GenomicSequence()
        self.assertEqual(gen_seq.dict(), {})
        gen_seq.marker_id = "pepe"
        gen_seq.marker_type = "16S rRNA"
        strain.genetics.markers.append(gen_seq)
        self.assertEqual(
            strain.dict()[GENETICS][MARKERS],
            [{"marker_type": "16S rRNA", "INSDC": "pepe"}],
        )
        strain.collect.habitat_ontobiotope = "OBT:111111"
        self.assertEqual(strain.collect.habitat_ontobiotope, "OBT:111111")
        try:
            strain.collect.habitat_ontobiotope = "OBT:11111"
            self.fail()
        except ValidationError:
            pass
        # publications
        try:
            strain.publications = 1
            self.fail()
        except ValidationError:
            pass
        pub = Publication()
        pub.id = "1"
        try:
            strain.publications = pub
            self.fail()
        except ValidationError:
            pass
        strain.publications = [pub]
        self.assertEqual(strain.publications[0].id, "1")
        strain.catalog_inclusion_date = DateRange(year=1992)
        self.assertEqual(strain.dict()[DATE_OF_INCLUSION], '1992----')
        import pprint
        pprint.pprint(strain.dict())
    def test_strain_validation(self):
        strain = Strain()
        strain.form_of_supply = ['Lyo']
        return
        errors = validate_strain(strain)
        self.assertEqual(len(errors), 10)
        strain.id.collection = 'test'
        strain.id.number = '1'
        errors = validate_strain(strain)
        self.assertEqual(len(errors), 9)
        strain.nagoya_protocol = NAGOYA_DOCS_AVAILABLE
        strain.restriction_on_use = NO_RESTRICTION
        strain.risk_group = 1
        strain.taxonomy.organism_type = [OrganismType(4)]
        strain.taxonomy.hybrids = ['Sac lac', 'Sac lcac3']
        strain.growth.recommended_media = ['aa']
        strain.growth.recommended_temp = {'min': 2, 'max':5}
        strain.form_of_supply = ['lyo']
        strain.collect.location.country = 'ESP'
        errors = validate_strain(strain)
        self.assertFalse(errors)
 class TestIsolation(unittest.TestCase):
    def test_iniatialize_isollation(self):
        isolation = Isolation()
        self.assertEqual(isolation.dict(), {})
        isolation.who = "pepito"
        self.assertTrue(ISOLATED_BY in isolation.dict())
        isolation.date = DateRange().strpdate("2012----")
        self.assertTrue(DATE_OF_ISOLATION in isolation.dict())
        try:
            isolation.location.site = "spain"
            self.fail()
        except (ValueError, AttributeError):
            pass
 class TestGenomicSequence(unittest.TestCase):
    def test_empty_init(self):
        gen_seq = GenomicSequence()
        self.assertEqual(gen_seq.dict(), {})
        gen_seq.marker_id = "pepe"
        gen_seq.marker_type = "16S rRNA"
        self.assertEqual(gen_seq.dict(), {
                         "marker_type": "16S rRNA", "INSDC": "pepe"})
 if __name__ == "__main__":
    # import sys;sys.argv = ['', 'TestStrain']
    unittest.main()
--- a/tests/test_parsers.py
+++ b/tests/test_parsers.py
@ -0,0 +1,51 @@
 from mirri.entities.strain import ValidationError
 import unittest
 from pathlib import Path
 from pprint import pprint
 from mirri.io.parsers.mirri_excel import parse_mirri_excel
 TEST_DATA_DIR = Path(__file__).parent / "data"
 class MirriExcelTests(unittest.TestCase):
    def test_mirri_excel_parser(self):
        in_path = TEST_DATA_DIR / "valid.mirri.xlsx"
        with in_path.open("rb") as fhand:
            parsed_data = parse_mirri_excel(fhand, version="20200601")
        medium = parsed_data["growth_media"][0]
        self.assertEqual("1", medium.acronym)
        self.assertEqual(medium.description, "NUTRIENT BROTH/AGAR I")
        strains = list(parsed_data["strains"])
        strain = strains[0]
        self.assertEqual(strain.publications[0].id, 1)
        self.assertEqual(strain.publications[0].title, 'Cosa')
        self.assertEqual(strain.id.number, "1")
        pprint(strain.dict())
    def xtest_mirri_excel_parser_invalid_fail(self):
        in_path = TEST_DATA_DIR / "invalid.mirri.xlsx"
        with in_path.open("rb") as fhand:
            try:
                parse_mirri_excel(fhand, version="20200601")
                self.fail()
            except ValidationError:
                pass
    def xtest_mirri_excel_parser_invalid(self):
        in_path = TEST_DATA_DIR / "invalid.mirri.xlsx"
        with in_path.open("rb") as fhand:
            parsed_data = parse_mirri_excel(
                fhand, version="20200601")
        errors = parsed_data["errors"]
        for _id, _errors in errors.items():
            print(_id, _errors)
 if __name__ == "__main__":
    # import sys;sys.argv = ['',
    #                        'MirriExcelTests.test_mirri_excel_parser_invalid']
    unittest.main()
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@ -0,0 +1,589 @@
 from datetime import datetime
 import unittest
 from pathlib import Path
 from itertools import chain
 from mirri.validation.tags import (
    CHOICES,
    COORDINATES,
    CROSSREF,
    CROSSREF_NAME,
    DATE,
    MATCH,
    MISSING,
    MULTIPLE,
    NUMBER,
    REGEXP,
    SEPARATOR,
    TAXON,
    TYPE,
    UNIQUE,
    VALUES
 )
 from mirri.validation.excel_validator import (
    is_valid_choices,
    is_valid_coords,
    is_valid_crossrefs,
    is_valid_date,
    is_valid_missing,
    is_valid_number,
    is_valid_regex,
    is_valid_taxon,
    is_valid_unique,
    is_valid_file,
    validate_mirri_excel,
 )
 TEST_DATA_DIR = Path(__file__).parent / "data"
 TS_VALUE = "value"
 TS_CONF = "conf"
 TS_ASSERT = "assert_func"
 class MirriExcelValidationTests(unittest.TestCase):
    def test_validation_structure(self):
        in_path = TEST_DATA_DIR / "invalid_structure.mirri.xlsx"
        with in_path.open("rb") as fhand:
            error_log = validate_mirri_excel(fhand)
        entities = []
        err_codes = []
        for ett, errors in error_log.get_errors().items():
            entities.append(ett)
            err_codes.extend([err.code for err in errors])
        self.assertIn("EFS", entities)
        self.assertIn("STD", entities)
        self.assertIn("GOD", entities)
        self.assertIn("GMD", entities)
        self.assertIn("EFS03", err_codes)
        self.assertIn("EFS06", err_codes)
        self.assertIn("EFS08", err_codes)
        self.assertIn("GOD06", err_codes)
        self.assertIn("GMD01", err_codes)
        self.assertIn("STD05", err_codes)
        self.assertIn("STD08", err_codes)
        self.assertIn("STD12", err_codes)
    def test_validation_content(self):
        in_path = TEST_DATA_DIR / "invalid_content.mirri.xlsx"
        with in_path.open("rb") as fhand:
            error_log = validate_mirri_excel(fhand)
        entities = []
        err_codes = []
        for ett, errors in error_log.get_errors().items():
            entities.append(ett)
            err_codes.extend([err.code for err in errors])
        self.assertTrue(len(err_codes) > 0)
        self.assertNotIn("EFS", entities)
        self.assertIn("STD", entities)
        self.assertIn("GOD", entities)
        self.assertIn("GID", entities)
        self.assertIn("GOD04", err_codes)
        self.assertIn("GOD07", err_codes)
        self.assertIn("GID03", err_codes)
        self.assertIn("STD11", err_codes)
        self.assertIn("STD15", err_codes)
        self.assertIn("STD22", err_codes)
        self.assertIn("STD04", err_codes)
        self.assertIn("STD10", err_codes)
        self.assertIn("STD07", err_codes)
        self.assertIn("STD14", err_codes)
        self.assertIn("STD16", err_codes)
    def test_validation_valid(self):
        in_path = TEST_DATA_DIR / "valid.mirri.xlsx"
        with in_path.open("rb") as fhand:
            error_log = validate_mirri_excel(fhand)
        self.assertTrue(len(error_log.get_errors()) == 0)
 class ValidatoionFunctionsTest(unittest.TestCase):
    def test_is_valid_regex(self):
        tests = [
            {
                TS_VALUE: "abcDEF",
                TS_CONF: {TYPE: REGEXP, MATCH: r"[a-zA-Z]+"},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "123456",
                TS_CONF: {TYPE: REGEXP, MATCH: r"[a-zA-Z]+"},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: "123456",
                TS_CONF: {TYPE: REGEXP, MATCH: r"\d+"},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "abcdef",
                TS_CONF: {TYPE: REGEXP, MATCH: r"\d+"},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: "abc 123",
                TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "123 abc",
                TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "123      ",
                TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
                TS_ASSERT: self.assertFalse
            },
        ]
        for test in tests:
            value = test[TS_VALUE]
            conf = test[TS_CONF]
            assert_func = test[TS_ASSERT]
            with self.subTest(value=value):
                assert_func(is_valid_regex(value, conf))
    def test_is_valid_choices(self):
        tests = [
            {
                TS_VALUE: "1",
                TS_CONF: {TYPE: CHOICES, VALUES: ["1", "2", "3", "4"]},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "1, 3",
                TS_CONF: {
                    TYPE: CHOICES,
                    VALUES: ["1", "2", "3", "4"],
                    MULTIPLE: True,
                    SEPARATOR: ","
                },
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "5",
                TS_CONF: {TYPE: CHOICES, VALUES: ["1", "2", "3", "4"]},
                TS_ASSERT: self.assertFalse
            },
        ]
        for test in tests:
            value = test[TS_VALUE]
            conf = test[TS_CONF]
            assert_func = test[TS_ASSERT]
            with self.subTest(value=value):
                assert_func(is_valid_choices(value, conf))
    def test_is_valid_crossref(self):
        tests = [
            {
                TS_VALUE: "abc",
                TS_CONF: {
                    TYPE: CROSSREF,
                    CROSSREF_NAME: "values",
                    "crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
                },
                TS_ASSERT: self.assertTrue,
            },
            {
                TS_VALUE: "123",
                TS_CONF: {
                    TYPE: CROSSREF,
                    CROSSREF_NAME: "values",
                    "crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
                },
                TS_ASSERT: self.assertFalse,
            },
            {
                TS_VALUE: "abc, def",
                TS_CONF: {
                    TYPE: CROSSREF,
                    CROSSREF_NAME: "values",
                    "crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
                    MULTIPLE: True,
                    SEPARATOR: ",",
                },
                TS_ASSERT: self.assertTrue,
            },
            {
                TS_VALUE: "abc, 123",
                TS_CONF: {
                    TYPE: CROSSREF,
                    CROSSREF_NAME: "values",
                    "crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
                    MULTIPLE: True,
                    SEPARATOR: ",",
                },
                TS_ASSERT: self.assertFalse,
            },
        ]
        for test in tests:
            value = test[TS_VALUE]
            conf = test[TS_CONF]
            assert_func = test[TS_ASSERT]
            with self.subTest(value=value):
                assert_func(is_valid_crossrefs(value, conf))
    def test_is_valid_missing(self):
        tests = [
            {
                TS_VALUE: 1,
                TS_CONF: {TYPE: MISSING},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "abc",
                TS_CONF: {TYPE: MISSING},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: None,
                TS_CONF: {TYPE: MISSING},
                TS_ASSERT: self.assertFalse
            },
        ]
        for test in tests:
            value = test[TS_VALUE]
            conf = test[TS_CONF]
            assert_func = test[TS_ASSERT]
            with self.subTest(value=value):
                assert_func(is_valid_missing(value, conf))
    def test_is_valid_date(self):
        tests = [
            {
                TS_VALUE: '2020-04-07',
                TS_CONF: {TYPE: DATE},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: '2020/04/07',
                TS_CONF: {TYPE: DATE},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: datetime(2021, 5, 1),
                TS_CONF: {TYPE: DATE},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: '2020-05',
                TS_CONF: {TYPE: DATE},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: '2020/05',
                TS_CONF: {TYPE: DATE},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: 2020,
                TS_CONF: {TYPE: DATE},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: '2021 05 01',
                TS_CONF: {TYPE: DATE},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: '04-07-2020',
                TS_CONF: {TYPE: DATE},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: '2021-02-31',
                TS_CONF: {TYPE: DATE},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: '2021-15',
                TS_CONF: {TYPE: DATE},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: '15-2021',
                TS_CONF: {TYPE: DATE},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: 3000,
                TS_CONF: {TYPE: DATE},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: -2020,
                TS_CONF: {TYPE: DATE},
                TS_ASSERT: self.assertFalse
            },
        ]
        for test in tests:
            value = test[TS_VALUE]
            conf = test[TS_CONF]
            assert_func = test[TS_ASSERT]
            with self.subTest(value=value):
                assert_func(is_valid_date(value, conf))
    def test_is_valid_coordinates(self):
        tests = [
            {
                TS_VALUE: "23; 50",
                TS_CONF: {TYPE: COORDINATES},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "-90; -100",
                TS_CONF: {TYPE: COORDINATES},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "90; 100",
                TS_CONF: {TYPE: COORDINATES},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "0; 0",
                TS_CONF: {TYPE: COORDINATES},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "10; 20; 5",
                TS_CONF: {TYPE: COORDINATES},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "10; 20; -5",
                TS_CONF: {TYPE: COORDINATES},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "91; 50",
                TS_CONF: {TYPE: COORDINATES},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: "87; 182",
                TS_CONF: {TYPE: COORDINATES},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: "-200; 182",
                TS_CONF: {TYPE: COORDINATES},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: "20, 40",
                TS_CONF: {TYPE: COORDINATES},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: "abc def",
                TS_CONF: {TYPE: COORDINATES},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: 123,
                TS_CONF: {TYPE: COORDINATES},
                TS_ASSERT: self.assertFalse
            },
        ]
        for test in tests:
            value = test[TS_VALUE]
            conf = test[TS_CONF]
            assert_func = test[TS_ASSERT]
            with self.subTest(value=value):
                assert_func(is_valid_coords(value, conf))
    def test_is_valid_number(self):
        tests = [
            {
                TS_VALUE: 1,
                TS_CONF: {TYPE: NUMBER},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: 2.5,
                TS_CONF: {TYPE: NUMBER},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "10",
                TS_CONF: {TYPE: NUMBER},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "10.5",
                TS_CONF: {TYPE: NUMBER},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: 5,
                TS_CONF: {TYPE: NUMBER, "min": 0},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: 5,
                TS_CONF: {TYPE: NUMBER, "max": 10},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: 5,
                TS_CONF: {TYPE: NUMBER, "min": 0, "max": 10},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "hello",
                TS_CONF: {TYPE: NUMBER},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: 10,
                TS_CONF: {TYPE: NUMBER, "max": 5},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: 0,
                TS_CONF: {TYPE: NUMBER, "min": 5},
                TS_ASSERT: self.assertFalse
            },
        ]
        for test in tests:
            value = test[TS_VALUE]
            conf = test[TS_CONF]
            assert_func = test[TS_ASSERT]
            with self.subTest(value=value):
                assert_func(is_valid_number(value, conf))
    def test_is_valid_taxon(self):
        tests = [
            {
                TS_VALUE: 'sp. species',
                TS_CONF: {TYPE: TAXON},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: 'spp species subsp. subspecies',
                TS_CONF: {TYPE: TAXON},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: 'spp species subsp. subspecies var. variety',
                TS_CONF: {TYPE: TAXON},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: 'spp taxon',
                TS_CONF: {TYPE: TAXON},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: 'Candidaceae',
                TS_CONF: {TYPE: TAXON},
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: 'sp sp species',
                TS_CONF: {TYPE: TAXON},
                TS_ASSERT: self.assertFalse
            },
            {
                TS_VALUE: 'spp species abc. def',
                TS_CONF: {TYPE: TAXON},
                TS_ASSERT: self.assertFalse
            },
        ]
        for test in tests:
            value = test[TS_VALUE]
            conf = test[TS_CONF]
            assert_func = test[TS_ASSERT]
            with self.subTest(value=value):
                assert_func(is_valid_taxon(value, conf))
    def test_is_valid_unique(self):
        tests = [
            {
                TS_VALUE: "abc",
                TS_CONF: {
                    TYPE: UNIQUE,
                    "label": "values",
                    "shown_values": {}
                },
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "jkl",
                TS_CONF: {
                    TYPE: UNIQUE,
                    "label": "values",
                    "shown_values": {
                        "values": {"abc": '',
                                   "def": '',
                                   "ghi": ''},
                    }
                },
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: "abc",
                TS_CONF: {
                    TYPE: UNIQUE,
                    "label": "values",
                    "shown_values": {
                        "values": {"abc": '',
                                   "def": '',
                                   "ghi": ''},
                    }
                },
                TS_ASSERT: self.assertFalse
            },
        ]
        for test in tests:
            value = test[TS_VALUE]
            conf = test[TS_CONF]
            assert_func = test[TS_ASSERT]
            with self.subTest(value=value):
                assert_func(is_valid_unique(value, conf))
    def test_is_valid_file(self):
        tests = [
            {
                TS_VALUE: TEST_DATA_DIR / "invalid_structure.mirri.xlsx",
                TS_ASSERT: self.assertTrue
            },
            {
                TS_VALUE: TEST_DATA_DIR / "invalid_excel.mirri.json",
                TS_ASSERT: self.assertFalse
            },
        ]
        for test in tests:
            value = test[TS_VALUE]
            assert_func = test[TS_ASSERT]
            with self.subTest(value=value):
                assert_func(is_valid_file(value,))
 if __name__ == "__main__":
    import sys
    # sys.argv = ['',
    #             'ValidatoionFunctionsTest.test_is_valid_regex']
    unittest.main()
--- a/tests/test_writers.py
+++ b/tests/test_writers.py
@ -0,0 +1,24 @@
 import unittest
 from pathlib import Path
 from mirri.io.writers.mirri_excel import write_mirri_excel
 from mirri.io.parsers.mirri_excel import parse_mirri_excel
 TEST_DATA_DIR = Path(__file__).parent / "data"
 class MirriExcelTests(unittest.TestCase):
    def test_valid_excel(self):
        in_path = TEST_DATA_DIR / "valid.mirri.full.xlsx"
        parsed_data = parse_mirri_excel(in_path.open('rb'), version="20200601")
        strains = parsed_data["strains"]
        growth_media = parsed_data["growth_media"]
        out_path = Path("/tmp/test.xlsx")
        write_mirri_excel(out_path, strains, growth_media, version="20200601")
 if __name__ == "__main__":
    # import sys;sys.argv = ['',
    #                        'BiolomicsWriter.test_mirri_excel_parser_invalid']
    unittest.main()
		`@ -0,0 +1,2 @@`
							`def serialize_from_biolomics(ws_data, client=None):`
							`return ws_data`