First import

2022-02-18 12:09:05 +01:00 · 2022-02-18 12:09:05 +01:00 · 332876f58c
commit 332876f58c
73 changed files with 12572 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,19 @@
+# MIRRI Utils
+
+## Installation
+
+> pip install path_to_package.tar.gz
+
+
+## Description
+
+A small set of utilities to deal with Mirri Data.
+
+ -  A data class to deal with strain data.
+
+ - An excel reader for mirri specification
+
+ - An excel validator for mirri specification
+
+ - An excel writer to create the excel with MIRRI specifications
+
--- a/bin/delete_duplicated_strain_by_number.py
+++ b/bin/delete_duplicated_strain_by_number.py
@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+import argparse
+import sys
+
+from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
+from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS, STRAIN_WS
+
+SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
+
+
+def get_cmd_args():
+    desc = "Upload strains to MIRRI-IS"
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('-a', '--accession_number', required=True,
+                        help='Delete the duplicated items in database for the given accession number')
+    parser.add_argument('-u', '--ws_user', help='Username of the web service',
+                        required=True)
+    parser.add_argument('-p', '--ws_password', required=True,
+                        help='Password of the web service user')
+    parser.add_argument('-c', '--client_id', required=True,
+                        help='Client id of the web service')
+    parser.add_argument('-s', '--client_secret', required=True,
+                        help='Client secret of the web service')
+
+    args = parser.parse_args()
+
+    return {'accession_number': args.accession_number, 'user': args.ws_user,
+            'password': args.ws_password, 'client_id': args.client_id,
+            'client_secret': args.client_secret}
+
+
+def write_errors_in_screen(errors, fhand=sys.stderr):
+    for key, errors_by_type in errors.items():
+        fhand.write(f'{key}\n')
+        fhand.write('-' * len(key) + '\n')
+        for error in errors_by_type:
+            if error.pk:
+                fhand.write(f'{error.pk}: ')
+            fhand.write(f'{error.message} - {error.code}\n')
+        fhand.write('\n')
+
+
+def main():
+    args = get_cmd_args()
+    out_fhand = sys.stdout
+
+    client = BiolomicsMirriClient(server_url=SERVER_URL,  api_version= 'v2',
+                                  client_id=args['client_id'],
+                                  client_secret=args['client_secret'],
+                                  username=args['user'],
+                                  password=args['password'])
+    query = {"Query": [{"Index": 0,
+                        "FieldName": "Collection accession number",
+                        "Operation": "TextExactMatch",
+                        "Value": args['accession_number']}],
+             "Expression": "Q0",
+             "DisplayStart": 0,
+             "DisplayLength": 10}
+
+    result = client.search(STRAIN_WS, query=query)
+    total = result["total"]
+    if total == 0:
+        out_fhand.write('Accession not in database\n')
+        sys.exit(0)
+        return None
+    elif total == 1:
+        out_fhand.write('Accession is not duplicated\n')
+        sys.exit(0)
+
+    print(f'Duplicates found: {total}. removing duplicates')
+    duplicated_ids = [record.record_id  for record in result['records']]
+    for duplicated_id in duplicated_ids[:-1]:
+        client.delete_by_id(STRAIN_WS, duplicated_id)
+
+
+if __name__ == '__main__':
+    main()
--- a/bin/delete_mirri_data.py
+++ b/bin/delete_mirri_data.py
@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+import argparse
+import sys
+
+from mirri.biolomics.pipelines.strain import retrieve_strain_by_accession_number
+from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
+from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS, STRAIN_WS
+from mirri.io.parsers.mirri_excel import parse_mirri_excel
+from mirri.validation.excel_validator import validate_mirri_excel
+
+SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
+
+
+def get_cmd_args():
+    desc = "Upload strains to MIRRI-IS"
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('-i', '--input', help='Validated Excel file',
+                        type=argparse.FileType('rb'), required=True)
+    parser.add_argument('-v', '--spec_version', default='20200601',
+                        help='Version of he specification of the given excel file')
+    parser.add_argument('-u', '--ws_user', help='Username of the web service',
+                        required=True)
+    parser.add_argument('-p', '--ws_password', required=True,
+                        help='Password of the web service user')
+    parser.add_argument('-c', '--client_id', required=True,
+                        help='Client id of the web service')
+    parser.add_argument('-s', '--client_secret', required=True,
+                        help='Client secret of the web service')
+    parser.add_argument('-f', '--force_update', required=False,
+                        action='store_true',
+                        help='Use it if you want to update the existing strains')
+
+    args = parser.parse_args()
+
+    return {'input_fhand': args.input, 'user': args.ws_user,
+            'version': args.spec_version,
+            'password': args.ws_password, 'client_id': args.client_id,
+            'client_secret': args.client_secret, 'update': args.force_update}
+
+
+def write_errors_in_screen(errors, fhand=sys.stderr):
+    for key, errors_by_type in errors.items():
+        fhand.write(f'{key}\n')
+        fhand.write('-' * len(key) + '\n')
+        for error in errors_by_type:
+            if error.pk:
+                fhand.write(f'{error.pk}: ')
+            fhand.write(f'{error.message} - {error.code}\n')
+        fhand.write('\n')
+
+
+def main():
+    args = get_cmd_args()
+    input_fhand = args['input_fhand']
+    spec_version = args['version']
+    out_fhand = sys.stderr
+    error_log = validate_mirri_excel(input_fhand, version=spec_version)
+    errors = error_log.get_errors()
+    if errors:
+        write_errors_in_screen(errors, out_fhand)
+        sys.exit(1)
+
+    input_fhand.seek(0)
+    parsed_objects = parse_mirri_excel(input_fhand, version=spec_version)
+    strains = list(parsed_objects['strains'])
+    growth_media = list(parsed_objects['growth_media'])
+
+    client = BiolomicsMirriClient(server_url=SERVER_URL,  api_version= 'v2',
+                                  client_id=args['client_id'],
+                                  client_secret=args['client_secret'],
+                                  username=args['user'],
+                                  password=args['password'])
+    for gm in growth_media:
+        try:
+            client.delete_by_name(GROWTH_MEDIUM_WS, gm.acronym)
+        except ValueError as error:
+            print(error)
+            continue
+        print(f'Growth medium {gm.acronym} deleted')
+
+    for strain in strains:
+        ws_strain = retrieve_strain_by_accession_number(client, strain.id.strain_id)
+        if ws_strain is not None:
+            client.delete_by_id(STRAIN_WS, ws_strain.record_id)
+            print(f'Strain {strain.id.strain_id} deleted')
+        else:
+            print(f'Strain {strain.id.strain_id} not in database')
+
+
+if __name__ == '__main__':
+    main()
--- a/bin/upload_strains_to_mirri_is.py
+++ b/bin/upload_strains_to_mirri_is.py
@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+import argparse
+import sys
+from collections import Counter
+
+from mirri.biolomics.pipelines.growth_medium import get_or_create_or_update_growth_medium
+from mirri.biolomics.pipelines.strain import get_or_create_or_update_strain
+from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
+from mirri.io.parsers.mirri_excel import parse_mirri_excel
+from mirri.validation.excel_validator import validate_mirri_excel
+
+TEST_SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
+PROD_SERVER_URL = 'https://webservices.bio-aware.com/mirri'
+
+
+def get_cmd_args():
+    desc = "Upload strains to MIRRI-IS"
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('-i', '--input', help='Validated Excel file',
+                        type=argparse.FileType('rb'), required=True)
+    parser.add_argument('-v', '--spec_version', default='20200601',
+                        help='Version of he specification of the given excel file')
+    parser.add_argument('-u', '--ws_user', help='Username of the web service',
+                        required=True)
+    parser.add_argument('-p', '--ws_password', required=True,
+                        help='Password of the web service user')
+    parser.add_argument('-c', '--client_id', required=True,
+                        help='Client id of the web service')
+    parser.add_argument('-s', '--client_secret', required=True,
+                        help='Client secret of the web service')
+    parser.add_argument('--force_update', required=False,
+                        action='store_true',
+                        help='Use it if you want to update the existing strains')
+    parser.add_argument('--verbose', action='store_true',
+                        help='use it if you want a verbose output')
+    parser.add_argument('--prod', action='store_true',
+                        help='Use production server')
+    parser.add_argument('--dont_add_gm', action='store_false',
+                        help="Don't add growth media", default=True)
+    parser.add_argument('--dont_add_strains', action='store_false',
+                        help="Don't add growth media", default=True)
+    parser.add_argument('--skip_first_num', type=int,
+                       help='skip first X strains to the tool')
+
+    args = parser.parse_args()
+
+    return {'input_fhand': args.input, 'user': args.ws_user,
+            'version': args.spec_version,
+            'password': args.ws_password, 'client_id': args.client_id,
+            'client_secret': args.client_secret, 'update': args.force_update,
+            'verbose': args.verbose, 'use_production_server': args.prod,
+            'add_gm': args.dont_add_gm, 'add_strains': args.dont_add_strains,
+            'skip_first_num': args.skip_first_num}
+
+
+def write_errors_in_screen(errors, fhand=sys.stderr):
+    for key, errors_by_type in errors.items():
+        fhand.write(f'{key}\n')
+        fhand.write('-' * len(key) + '\n')
+        for error in errors_by_type:
+            if error.pk:
+                fhand.write(f'{error.pk}: ')
+            fhand.write(f'{error.message} - {error.code}\n')
+        fhand.write('\n')
+
+
+def create_or_upload_strains(client, strains, update=False, counter=None,
+                             out_fhand=None, seek=None):
+    for index, strain in enumerate(strains):
+        if seek is not None and index < seek:
+            continue
+        # if strain.id.strain_id != 'CECT 5766':
+        #     continue
+        result = get_or_create_or_update_strain(client, strain, update=update)
+
+        new_strain = result['record']
+        created = result['created']
+        updated = result.get('updated', False)
+        if updated:
+            result_state = 'updated'
+        elif created:
+            result_state = 'created'
+        else:
+            result_state = 'not modified'
+        if counter is not None:
+            counter[result_state] += 1
+        if out_fhand is not None:
+            out_fhand.write(f'{index}: Strain {new_strain.id.strain_id}: {result_state}\n')
+        # break
+
+
+def create_or_upload_growth_media(client, growth_media, update=False, counter=None,
+                                  out_fhand=None):
+
+    for gm in growth_media:
+        result = get_or_create_or_update_growth_medium(client, gm, update)
+
+        new_gm = result['record']
+        created = result['created']
+        updated = result.get('updated', False)
+        if updated:
+            result_state = 'updated'
+        elif created:
+            result_state = 'created'
+        else:
+            result_state = 'not modified'
+        if counter is not None:
+            counter[result_state] += 1
+        if out_fhand is not None:
+            out_fhand.write(f'Growth medium {new_gm.record_name}: {result_state}\n')
+
+
+def main():
+    args = get_cmd_args()
+    input_fhand = args['input_fhand']
+    spec_version = args['version']
+    out_fhand = sys.stdout
+    error_log = validate_mirri_excel(input_fhand, version=spec_version)
+    errors = error_log.get_errors()
+    skip_first_num = args['skip_first_num']
+    if errors:
+        write_errors_in_screen(errors, out_fhand)
+        sys.exit(1)
+
+    input_fhand.seek(0)
+    parsed_objects = parse_mirri_excel(input_fhand, version=spec_version)
+    strains = list(parsed_objects['strains'])
+    growth_media = list(parsed_objects['growth_media'])
+
+    server_url = PROD_SERVER_URL if args['use_production_server'] else TEST_SERVER_URL
+
+    client = BiolomicsMirriClient(server_url=server_url,  api_version='v2',
+                                  client_id=args['client_id'],
+                                  client_secret=args['client_secret'],
+                                  username=args['user'],
+                                  password=args['password'],
+                                  verbose=args['verbose'])
+
+    if args['add_gm']:
+        client.start_transaction()
+        counter = Counter()
+        try:
+            create_or_upload_growth_media(client, growth_media, update=args['update'],
+                                          counter=counter, out_fhand=out_fhand)
+        except (Exception, KeyboardInterrupt) as error:
+            out_fhand.write('There were some errors in the Growth media upload\n')
+            out_fhand.write(str(error) + '\n')
+            out_fhand.write('Rolling back\n')
+            client.rollback()
+            raise
+        client.finish_transaction()
+        show_stats(counter, 'Growth Media', out_fhand)
+
+    if args['add_strains']:
+        client.start_transaction()
+        counter = Counter()
+        try:
+            create_or_upload_strains(client, strains, update=args['update'],
+                                     counter=counter,
+                                     out_fhand=out_fhand, seek=skip_first_num)
+            client.finish_transaction()
+        except (Exception, KeyboardInterrupt) as error:
+            out_fhand.write('There were some errors in the Strain upload\n')
+            out_fhand.write(str(error) + '\n')
+            out_fhand.write('rolling back\n')
+            # client.rollback()
+            raise
+        client.finish_transaction()
+        show_stats(counter, 'Strains', out_fhand)
+
+
+def show_stats(counter, kind, out_fhand):
+    out_fhand.write(f'{kind}\n')
+    line = ''.join(['-'] * len(kind))
+    out_fhand.write(f"{line}\n")
+    for kind2, value in counter.most_common(5):
+        out_fhand.write(f'{kind2}: {value}\n')
+    out_fhand.write('\n')
+
+
+if __name__ == '__main__':
+    main()
--- a/bin/validate.py
+++ b/bin/validate.py
@ -0,0 +1,19 @@
+#!/usr/bin/env python
+import sys
+from pathlib import Path
+from mirri.validation.excel_validator import validate_mirri_excel
+import warnings
+warnings.simplefilter("ignore")
+
+
+def main():
+    path = Path(sys.argv[1])
+    error_log = validate_mirri_excel(path.open("rb"))
+
+    for errors in error_log.get_errors().values():
+        for error in errors:
+            print(error.pk, error.message, error.code)
+
+
+if __name__ == "__main__":
+    main()
--- a/docs/Error_Log_Style_Sheet.docx
+++ b/docs/Error_Log_Style_Sheet.docx
--- a/docs/ICT-TaskForce_HowToCompileTheSheets_v20200601.pdf
+++ b/docs/ICT-TaskForce_HowToCompileTheSheets_v20200601.pdf
--- a/docs/ICT-TaskForce_RecommendationsToCollections_v20200601.pdf
+++ b/docs/ICT-TaskForce_RecommendationsToCollections_v20200601.pdf
--- a/mirri/TODO.txt
+++ b/mirri/TODO.txt
@ -0,0 +1,61 @@
+
+Ontobiotope term. just one field in dataset, two fields in biolomics
+Altitude. Field and in Coordinates
+
+Geographic origin: field and Entry in other table
+
+Ploidy: How is this field formated?haploid/diploid or 1, 2, 3...
+
+Best strategy:
+
+My class has
+    - strain data
+    - geographic data
+    - literature
+    - sequences
+
+
+
+No not a valid value for Strain from a registered collection, Allowed values: ?. no. yes
+yes not a valid value for GMO, Allowed values: ?. No. Yes
+
+Organism Type:
+    firstuppercase in deposit
+    lower case in retrieve
+
+Taxon name is a list in retrieve
+
+
+null values:
+'Comment on taxonomy' = '' could be null
+'Coordinates of geographic origin':{Longitude, lati... 'NaN' could be null
+'Date of inclusion in the catalogue' = '' could be null
+'Enzyme production'= '' could be null
+'Ploidy':'?' could be null
+
+Deposit date
+
+--------------------------------------------
+
+- Assign seq to strain in strain serializers
+- Fields in ws that are not in our specification. What to do with them
+    - Type description - IGNORE
+    - Associated documents - IGNORE
+    - Data provided by - IGNORE
+    - Orders - IGNORE
+    - MTA text - IGNORE
+    - Catalog URL -
+
+- Publication RecordName assignation. How to do it?
+- Sequence RecordName assignation. How to do it
+- Publications serializer improvement
+------------------------------------------------------
+
+Marker Name. Which options are allowed in WS and how they map to the types in specifications?
+
+update: it should be done in the detail url.
+
+interspecific_hybrid set to "no" by default in web service if no value given.
+Tested temperature growth range {'max': 0.0, 'min': 0.0} when added empty
+
+Very slow: A normal search action takes
--- a/mirri/init.py
+++ b/mirri/init.py
@ -0,0 +1,21 @@
+import functools
+
+
+def rgetattr(obj, attr, *args):
+
+    def _getattr(obj, attr):
+        return getattr(obj, attr, *args)
+
+    return functools.reduce(_getattr, [obj] + attr.split('.'))
+
+
+def rsetattr(obj, attr, val):
+    pre, _, post = attr.rpartition('.')
+    return setattr(rgetattr(obj, pre) if pre else obj, post, val)
+
+# using wonder's beautiful simplification:
+# https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects/31174427?noredirect=1#comment86638618_31174427
+
+
+class ValidationError(Exception):
+    pass
--- a/mirri/biolomics/init.py
+++ b/mirri/biolomics/init.py
--- a/mirri/biolomics/pipelines/init.py
+++ b/mirri/biolomics/pipelines/init.py
--- a/mirri/biolomics/pipelines/growth_medium.py
+++ b/mirri/biolomics/pipelines/growth_medium.py
@ -0,0 +1,44 @@
+from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
+from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS
+from mirri.entities.growth_medium import GrowthMedium
+from mirri.biolomics.serializers.growth_media import get_growth_medium_record_name
+
+
+def get_or_create_or_update_growth_medium(client: BiolomicsMirriClient,
+                                          growth_medium: GrowthMedium,
+                                          update=False):
+    response = get_or_create_growth_medium(client, growth_medium)
+
+    new_gm = response['record']
+    created = response['created']
+    if created:
+        return {'record': new_gm, 'created': created, 'updated': False}
+
+    if not update:
+        return {'record': new_gm, 'created': False, 'updated': False}
+
+    # compare_strains
+    if growth_medium.is_equal(new_gm, exclude_fields=['record_id', 'record_name', 'acronym']):
+        records_are_different = False
+    else:
+        growth_medium.update(new_gm, include_fields=['record_id', 'record_name'])
+        records_are_different = True
+
+    if records_are_different:
+        updated_gm = client.update(GROWTH_MEDIUM_WS, growth_medium)
+        updated = True
+    else:
+        updated_gm = new_gm
+        updated = False
+    return {'record': updated_gm, 'created': False, 'updated': updated}
+
+
+def get_or_create_growth_medium(client: BiolomicsMirriClient,
+                                growth_medium: GrowthMedium):
+    record_name = get_growth_medium_record_name(growth_medium)
+    gm = client.retrieve_by_name(GROWTH_MEDIUM_WS, record_name)
+    if gm is not None:
+        return {'record': gm, 'created': False}
+
+    new_gm = client.create(GROWTH_MEDIUM_WS, growth_medium)
+    return {'record': new_gm, 'created': True}
--- a/mirri/biolomics/pipelines/strain.py
+++ b/mirri/biolomics/pipelines/strain.py
@ -0,0 +1,122 @@
+from pprint import pprint
+import deepdiff
+
+from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient, BIBLIOGRAPHY_WS, SEQUENCE_WS, STRAIN_WS
+
+from mirri.biolomics.serializers.sequence import GenomicSequenceBiolomics
+from mirri.biolomics.serializers.strain import StrainMirri
+from mirri.entities.publication import Publication
+
+
+def retrieve_strain_by_accession_number(client, accession_number):
+    query = {"Query": [{"Index": 0,
+                        "FieldName": "Collection accession number",
+                        "Operation": "TextExactMatch",
+                        "Value": accession_number}],
+             "Expression": "Q0",
+             "DisplayStart": 0,
+             "DisplayLength": 10}
+
+    result = client.search(STRAIN_WS, query=query)
+    total = result["total"]
+    if total == 0:
+        return None
+    elif total == 1:
+        return result["records"][0]
+    else:
+        msg = f"More than one entries for {accession_number} in database"
+        raise ValueError(msg)
+
+
+def get_or_create_publication(client: BiolomicsMirriClient, pub: Publication):
+    new_pub = client.retrieve_by_name(BIBLIOGRAPHY_WS, pub.title)
+
+    if new_pub is not None:
+        return {'record': new_pub, 'created': False}
+    new_pub = client.create(BIBLIOGRAPHY_WS, pub)
+    return {'record': new_pub, 'created': True}
+
+
+def get_or_create_sequence(client: BiolomicsMirriClient, sequence: GenomicSequenceBiolomics):
+    seq = client.retrieve_by_name(SEQUENCE_WS, sequence.marker_id)
+    if seq is not None:
+        return {'record': seq, 'created': False}
+
+    new_seq = client.create(SEQUENCE_WS, sequence)
+    return {'record': new_seq, 'created': True}
+
+
+def get_or_create_or_update_strain(client: BiolomicsMirriClient,
+                                   record: StrainMirri, update=False):
+    response = get_or_create_strain(client, record)
+    new_record = response['record']
+    created = response['created']
+
+    if created:
+        return {'record': new_record, 'created': True, 'updated': False}
+
+    if not update:
+        return {'record': new_record, 'created': False, 'updated': False}
+
+    if record.record_id is None:
+        record.record_id = new_record.record_id
+    if record.record_name is None:
+        record.record_name = new_record.record_name
+    if record.synonyms is None or record.synonyms == []:
+        record.synonyms = new_record.synonyms
+
+    # compare_strains
+    # we exclude pub id as it is an internal reference of pub and can be changed
+    diffs = deepdiff.DeepDiff(new_record.dict(), record.dict(),
+                              ignore_order=True, exclude_paths=None,
+                              exclude_regex_paths=[r"root\[\'publications\'\]\[\d+\]\[\'id\'\]",
+                                                   r"root\[\'publications\'\]\[\d+\]\[\'RecordId\'\]",
+                                                   r"root\[\'genetics\'\]\[\'Markers\'\]\[\d+\]\[\'RecordId\'\]",
+                                                   r"root\[\'genetics\'\]\[\'Markers\'\]\[\d+\]\[\'RecordName\'\]"])
+
+    if diffs:
+        pprint(diffs,  width=200)
+        # pprint('en el que yo mando')
+        # pprint(record.dict())
+        # pprint('lo que hay en db')
+        # pprint(new_record.dict())
+
+    records_are_different = True if diffs else False
+    if records_are_different:
+        updated_record = update_strain(client, record)
+        updated = True
+    else:
+        updated_record = record
+        updated = False
+    return {'record': updated_record, 'created': False, 'updated': updated}
+
+
+def get_or_create_strain(client: BiolomicsMirriClient, strain: StrainMirri):
+    new_strain = retrieve_strain_by_accession_number(client, strain.id.strain_id)
+    if new_strain is not None:
+        return {'record': new_strain, 'created': False}
+
+    new_strain = create_strain(client, strain)
+
+    return {'record': new_strain, 'created': True}
+
+
+def create_strain(client: BiolomicsMirriClient, strain: StrainMirri):
+    for pub in strain.publications:
+        creation_response = get_or_create_publication(client, pub)
+    for marker in strain.genetics.markers:
+        creation_response = get_or_create_sequence(client, marker)
+
+    new_strain = client.create(STRAIN_WS, strain)
+    return new_strain
+
+
+def update_strain(client: BiolomicsMirriClient, strain: StrainMirri):
+    for pub in strain.publications:
+        creation_response = get_or_create_publication(client, pub)
+    for marker in strain.genetics.markers:
+        creation_response = get_or_create_sequence(client, marker)
+
+    new_strain = client.update(STRAIN_WS, strain)
+    return new_strain
+
--- a/mirri/biolomics/remote/init.py
+++ b/mirri/biolomics/remote/init.py
--- a/mirri/biolomics/remote/biolomics_client.py
+++ b/mirri/biolomics/remote/biolomics_client.py
@ -0,0 +1,210 @@
+from mirri.biolomics.remote.endoint_names import (SEQUENCE_WS, STRAIN_WS,
+                                                  GROWTH_MEDIUM_WS, TAXONOMY_WS,
+                                                  COUNTRY_WS, ONTOBIOTOPE_WS,
+                                                  BIBLIOGRAPHY_WS)
+from mirri.biolomics.remote.rest_client import BiolomicsClient
+from mirri.biolomics.serializers.sequence import (
+    serialize_to_biolomics as sequence_to_biolomics,
+    serialize_from_biolomics as sequence_from_biolomics)
+from mirri.biolomics.serializers.strain import (
+    serialize_to_biolomics as strain_to_biolomics,
+    serialize_from_biolomics as strain_from_biolomics)
+
+from mirri.biolomics.serializers.growth_media import (
+    serialize_to_biolomics as growth_medium_to_biolomics,
+    serialize_from_biolomics as growth_medium_from_biolomics)
+from mirri.biolomics.serializers.taxonomy import (
+    serialize_from_biolomics as taxonomy_from_biolomics)
+from mirri.biolomics.serializers.locality import (
+    serialize_from_biolomics as country_from_biolomics)
+from mirri.biolomics.serializers.ontobiotope import (
+    serialize_from_biolomics as ontobiotope_from_biolomics)
+from mirri.biolomics.serializers.bibliography import (
+    serializer_from_biolomics as bibliography_from_biolomics,
+    serializer_to_biolomics as bibliography_to_biolomics
+)
+from pprint import pprint
+
+
+class BiolomicsMirriClient:
+    _conf = {
+        SEQUENCE_WS: {
+            'serializers': {'to': sequence_to_biolomics,
+                            'from': sequence_from_biolomics},
+            'endpoint': 'WS Sequences'},
+        STRAIN_WS: {
+            'serializers': {'to': strain_to_biolomics,
+                            'from': strain_from_biolomics},
+            'endpoint': 'WS Strains'},
+        GROWTH_MEDIUM_WS: {
+            'serializers':  {'from': growth_medium_from_biolomics,
+                             'to': growth_medium_to_biolomics},
+            'endpoint': 'WS Growth media'},
+        TAXONOMY_WS: {
+            'serializers':  {'from': taxonomy_from_biolomics},
+            'endpoint': 'WS Taxonomy'},
+        COUNTRY_WS: {
+            'serializers':  {'from': country_from_biolomics},
+            'endpoint': 'WS Locality'},
+        ONTOBIOTOPE_WS: {
+            'serializers':  {'from': ontobiotope_from_biolomics},
+            'endpoint': 'WS Ontobiotope'},
+        BIBLIOGRAPHY_WS: {
+            'serializers': {'from': bibliography_from_biolomics,
+                            'to':  bibliography_to_biolomics},
+            'endpoint':  'WS Bibliography'
+        }
+    }
+
+    def __init__(self, server_url, api_version, client_id, client_secret, username,
+                 password, website_id=1, verbose=False):
+        _client = BiolomicsClient(server_url, api_version, client_id,
+                                  client_secret, username, password,
+                                  website_id=website_id, verbose=verbose)
+
+        self.client = _client
+        self.schemas = self.client.get_schemas()
+        self.allowed_fields = self.client.allowed_fields
+        self._transaction_created_ids = None
+        self._in_transaction = False
+        self._verbose = verbose
+
+    def _initialize_transaction_storage(self):
+        if self._in_transaction:
+            msg = 'Can not initialize transaction if already in a transaction'
+            raise RuntimeError(msg)
+        self._transaction_created_ids = []
+
+    def _add_created_to_transaction_storage(self, response, entity_name):
+        if not self._in_transaction:
+            msg = 'Can not add ids to transaction storage if not in a transaction'
+            raise RuntimeError(msg)
+
+        id_ = response.json().get('RecordId', None)
+        if id_ is not None:
+            ws_endpoint_name = self._conf[entity_name]['endpoint']
+            self._transaction_created_ids.insert(0, (ws_endpoint_name, id_))
+
+    def start_transaction(self):
+        self._initialize_transaction_storage()
+        self._in_transaction = True
+
+    def finish_transaction(self):
+        self._in_transaction = False
+        self._transaction_created_ids = None
+
+    def get_endpoint(self, entity_name):
+        return self._conf[entity_name]['endpoint']
+
+    def get_serializers_to(self, entity_name):
+        return self._conf[entity_name]['serializers']['to']
+
+    def get_serializers_from(self, entity_name):
+        return self._conf[entity_name]['serializers']['from']
+
+    def retrieve_by_name(self, entity_name, name):
+        endpoint = self.get_endpoint(entity_name)
+        serializer_from = self.get_serializers_from(entity_name)
+        response = self.client.find_by_name(endpoint, name=name)
+        if response.status_code == 404:
+            return None
+        elif response.status_code != 200:
+            raise ValueError(f"{response.status_code}: {response.text}")
+
+        ws_entity = response.json()
+
+        return None if ws_entity is None else serializer_from(ws_entity,
+                                                              client=self)
+
+    def retrieve_by_id(self, entity_name, _id):
+        endpoint = self.get_endpoint(entity_name)
+        serializer_from = self.get_serializers_from(entity_name)
+        response = self.client.retrieve(endpoint, record_id=_id)
+        if response.status_code == 404:
+            return None
+        elif response.status_code != 200:
+            raise ValueError(f"{response.status_code}: {response.text}")
+
+        ws_entity = response.json()
+
+        return serializer_from(ws_entity, client=self)
+
+    def create(self, entity_name, entity):
+        endpoint = self.get_endpoint(entity_name)
+        serializer_to = self.get_serializers_to(entity_name)
+        serializer_from = self.get_serializers_from(entity_name)
+        data = serializer_to(entity, client=self)
+        response = self.client.create(endpoint, data=data)
+        if response.status_code == 200:
+            if self._in_transaction:
+                self._add_created_to_transaction_storage(response, entity_name)
+            return serializer_from(response.json(), client=self)
+        else:
+            msg = f"return_code: {response.status_code}. msg: {response.json()['errors']['Value']}"
+            raise RuntimeError(msg)
+
+    def delete_by_id(self, entity_name, record_id):
+        endpoint = self.get_endpoint(entity_name)
+        response = self.client.delete(endpoint, record_id=record_id)
+        if response.status_code != 200:
+            error = response.json()
+            # msg = f'{error["Title"]: {error["Details"]}}'
+            raise RuntimeError(error)
+
+    def delete_by_name(self, entity_name, record_name):
+        endpoint = self.get_endpoint(entity_name)
+        response = self.client.find_by_name(endpoint, record_name)
+        if response.status_code != 200:
+            error = response.json()
+            # msg = f'{error["Title"]: {error["Details"]}}'
+            raise RuntimeError(error)
+        try:
+            record_id = response.json()['RecordId']
+        except TypeError:
+            raise ValueError(f'The given record_name {record_name} does not exists')
+        self.delete_by_id(entity_name, record_id=record_id)
+
+    def search(self, entity_name, query):
+        endpoint = self.get_endpoint(entity_name)
+        serializer_from = self.get_serializers_from(entity_name)
+        response = self.client.search(endpoint, search_query=query)
+        if response.status_code != 200:
+            error = response.json()
+            # msg = f'{error["Title"]: {error["Details"]}}'
+            raise RuntimeError(error)
+        search_result = response.json()
+        # pprint(search_result)
+        result = {'total': search_result['TotalCount'],
+                  'records': [serializer_from(record, client=self)
+                                for record in search_result['Records']]}
+        return result
+
+    def update(self, entity_name, entity):
+        record_id = entity.record_id
+        if record_id is None:
+            msg = 'In order to update the record, you need the recordId in the entity'
+            raise ValueError(msg)
+        endpoint = self.get_endpoint(entity_name)
+        serializer_to = self.get_serializers_to(entity_name)
+        serializer_from = self.get_serializers_from(entity_name)
+        data = serializer_to(entity, client=self, update=True)
+        # print('update')
+        # pprint(entity.dict())
+        # print(data)
+        # pprint(data, width=200)
+        response = self.client.update(endpoint, record_id=record_id, data=data)
+        if response.status_code == 200:
+            # print('receive')
+            # pprint(response.json())
+            entity = serializer_from(response.json(), client=self)
+            # pprint(entity.dict())
+            return entity
+
+        else:
+            msg = f"return_code: {response.status_code}. msg: {response.text}"
+            raise RuntimeError(msg)
+
+    def rollback(self):
+        self._in_transaction = False
+        self.client.rollback(self._transaction_created_ids)
+        self._transaction_created_ids = None
--- a/mirri/biolomics/remote/endoint_names.py
+++ b/mirri/biolomics/remote/endoint_names.py
@ -0,0 +1,7 @@
+SEQUENCE_WS = 'sequence'
+STRAIN_WS = 'strain'
+GROWTH_MEDIUM_WS = 'growth_medium'
+TAXONOMY_WS = 'taxonomy'
+COUNTRY_WS = 'country'
+ONTOBIOTOPE_WS = 'ontobiotope'
+BIBLIOGRAPHY_WS = 'bibliography'
--- a/mirri/biolomics/remote/rest_client.py
+++ b/mirri/biolomics/remote/rest_client.py
@ -0,0 +1,214 @@
+import time
+import re
+import sys
+
+import requests
+from requests_oauthlib import OAuth2Session
+from oauthlib.oauth2 import LegacyApplicationClient
+from oauthlib.oauth2.rfc6749.errors import InvalidGrantError
+
+from mirri.entities.strain import ValidationError
+
+
+class BiolomicsClient:
+    schemas = None
+    allowed_fields = None
+
+    def __init__(self, server_url, api_version, client_id, client_secret,
+                 username, password, website_id=1, verbose=False):
+        self._client_id = client_id
+        self._client_secret = client_secret
+        self._username = username
+        self._password = password
+        self._client = None
+        self.server_url = server_url
+        self._api_version = api_version
+        self._auth_url = self.server_url + "/connect/token"
+        self.access_token = None
+        self.website_id = website_id
+        self._verbose = verbose
+        self._schema = self.get_schemas()
+
+    def get_access_token(self):
+        if self._client is None:
+            self._client = LegacyApplicationClient(client_id=self._client_id)
+            authenticated = False
+        else:
+            expires_at = self._client.token["expires_at"]
+            authenticated = expires_at > time.time()
+        if not authenticated:
+            oauth = OAuth2Session(client=self._client)
+            try:
+                token = oauth.fetch_token(
+                    token_url=self._auth_url,
+                    username=self._username,
+                    password=self._password,
+                    client_id=self._client_id,
+                    client_secret=self._client_secret,
+                )
+            except InvalidGrantError:
+                oauth.close()
+                raise
+            self.access_token = token["access_token"]
+            oauth.close()
+        return self.access_token
+
+    def _build_headers(self):
+        self.get_access_token()
+        return {
+            "accept": "application/json",
+            "websiteId": str(self.website_id),
+            "Authorization": f"Bearer {self.access_token}",
+        }
+
+    def get_detail_url(self, end_point, record_id, api_version=None):
+        # api_version = self._api_version if api_version is None else api_version
+        if api_version:
+            return "/".join([self.server_url, api_version, 'data',
+                             end_point, str(record_id)])
+        else:
+            return "/".join([self.server_url, 'data', end_point, str(record_id)])
+
+    def get_list_url(self, end_point):
+        return "/".join([self.server_url, 'data', end_point])
+        # return "/".join([self.server_url, self._api_version, 'data', end_point])
+
+    def get_search_url(self, end_point):
+        return "/".join([self.server_url, self._api_version, 'search', end_point])
+
+    def get_find_by_name_url(self, end_point):
+        return "/".join([self.get_search_url(end_point), 'findByName'])
+
+    def search(self, end_point, search_query):
+        self._check_end_point_exists(end_point)
+        header = self._build_headers()
+        url = self.get_search_url(end_point)
+        time0 = time.time()
+        response = requests.post(url, json=search_query, headers=header)
+        time1 = time.time()
+        if self._verbose:
+            sys.stdout.write(f'Search to {end_point} request time for {url}: {time1 - time0}\n')
+        return response
+
+    def retrieve(self, end_point, record_id):
+        self._check_end_point_exists(end_point)
+        header = self._build_headers()
+        url = self.get_detail_url(end_point, record_id, api_version=self._api_version)
+        time0 = time.time()
+        response = requests.get(url, headers=header)
+        time1 = time.time()
+        if self._verbose:
+            sys.stdout.write(f'Get to {end_point} request time for {url}: {time1-time0}\n')
+        return response
+
+    def create(self, end_point, data):
+        self._check_end_point_exists(end_point)
+        self._check_data_consistency(data, self.allowed_fields[end_point])
+        header = self._build_headers()
+        url = self.get_list_url(end_point)
+        return requests.post(url, json=data, headers=header)
+
+    def update(self, end_point, record_id, data):
+        self._check_end_point_exists(end_point)
+        self._check_data_consistency(data, self.allowed_fields[end_point],
+                                     update=True)
+        header = self._build_headers()
+        url = self.get_detail_url(end_point, record_id=record_id)
+        return requests.put(url, json=data, headers=header)
+
+    def delete(self, end_point, record_id):
+        self._check_end_point_exists(end_point)
+        header = self._build_headers()
+        url = self.get_detail_url(end_point, record_id)
+        return requests.delete(url, headers=header)
+
+    def find_by_name(self, end_point, name):
+        self._check_end_point_exists(end_point)
+        header = self._build_headers()
+        url = self.get_find_by_name_url(end_point)
+        response = requests.get(url, headers=header, params={'name': name})
+        return response
+
+    def get_schemas(self):
+        if self.schemas is None:
+            headers = self._build_headers()
+            url = self.server_url + '/schemas'
+            response = requests.get(url, headers=headers)
+            if response.status_code == 200:
+                self.schemas = response.json()
+            else:
+                raise ValueError(f"{response.status_code}: {response.text}")
+        if self.allowed_fields is None:
+            self.allowed_fields = self._process_schema(self.schemas)
+        return self.schemas
+
+    @staticmethod
+    def _process_schema(schemas):
+        schema = schemas[0]
+        allowed_fields = {}
+        for endpoint_schema in schema['TableViews']:
+            endpoint_name = endpoint_schema['TableViewName']
+            endpoint_values = endpoint_schema['ResultFields']
+            fields = {field['title']: field for field in endpoint_values}
+            allowed_fields[endpoint_name] = fields
+        return allowed_fields
+
+    def _check_end_point_exists(self, endpoint):
+        if endpoint not in self.allowed_fields.keys():
+            raise ValueError(f'{endpoint} not a recognised endpoint')
+
+    def _check_data_consistency(self, data, allowed_fields, update=False):
+        update_mandatory = set(['RecordDetails', 'RecordName', 'RecordId'])
+        if update and not update_mandatory.issubset(data.keys()):
+            msg = 'Updating data keys must be RecordDetails, RecordName and RecordId'
+            raise ValidationError(msg)
+
+        if not update and set(data.keys()).difference(['RecordDetails', 'RecordName', 'Acronym']):
+            msg = 'data keys must be RecordDetails and RecordName or Acronym'
+            raise ValidationError(msg)
+        for field_name, field_value in data['RecordDetails'].items():
+            if field_name not in allowed_fields:
+                raise ValidationError(f'{field_name} not in allowed fields')
+
+            field_schema = allowed_fields[field_name]
+            self._check_field_schema(field_name, field_schema, field_value)
+
+    @staticmethod
+    def _check_field_schema(field_name, field_schema, field_value):
+        if field_schema['FieldType'] != field_value['FieldType']:
+            msg = f"Bad FieldType ({field_value['FieldType']}) for {field_name}. "
+            msg += f"It should be {field_schema['FieldType']}"
+            raise ValidationError(msg)
+
+        states = field_schema['states'] if 'states' in field_schema else None
+        if states:
+            states = [re.sub(r" *\(.*\)", "", s) for s in states]
+
+        subfields = field_schema['subfields'] if 'subfields' in field_schema else None
+        if subfields is not None and states is not None:
+            subfield_names = [subfield['SubFieldName']
+                              for subfield in subfields if subfield['IsUsed']]
+
+            for val in field_value['Value']:
+                if val['Name'] not in subfield_names:
+                    msg = f"{field_name}: {val['Name']} not in {subfield_names}"
+                    raise ValidationError(msg)
+
+                if val['Value'] not in states:
+
+                    msg = f"{field_value['Value']} not a valid value for "
+                    msg += f"{field_name}, Allowed values: {'. '.join(states)}"
+                    raise ValidationError(msg)
+
+        elif states is not None:
+            if field_value['Value'] not in states:
+                msg = f"{field_value['Value']} not a valid value for "
+                msg += f"{field_name}, Allowed values: {'. '.join(states)}"
+                raise ValidationError(msg)
+
+    def rollback(self, created_ids):
+        for endpoint, id_ in created_ids:
+            try:
+                self.delete(end_point=endpoint, record_id=id_)
+            except Exception:
+                pass
--- a/mirri/biolomics/serializers/init.py
+++ b/mirri/biolomics/serializers/init.py
@ -0,0 +1,3 @@
+RECORD_ID = 'RecordId'
+RECORD_NAME = 'RecordName'
+RECORD_DETAILS = 'RecordDetails'
--- a/mirri/biolomics/serializers/bibliography.py
+++ b/mirri/biolomics/serializers/bibliography.py
@ -0,0 +1,82 @@
+from typing import List
+
+from mirri import rgetattr
+from mirri.entities.publication import Publication
+from mirri.biolomics.settings import PUB_MIRRI_FIELDS
+
+RECORD_ID = 'RecordId'
+RECORD_NAME = 'RecordName'
+
+PUB_MAPPING = {
+    # 'record_id': 'RecordId',
+    # 'record_name': 'RecordName',
+    'strains': "Associated strains",
+    'taxa': "Associated taxa",
+    'authors': "Authors",
+    # 'sequneces': "Associated sequences",
+    # 'abstract': "Abstract",
+    # 'collection': "Collection",
+    'doi': "DOI number",
+    'editor': "Editor(s)",
+    # 'full_reference': "Full reference",
+    # 'link': "Hyperlink",
+    'isbn': "ISBN",
+    'issn': "ISSN",
+    'issue': "Issue",
+    'journal': "Journal",
+    'journal_book': "Journal-Book",
+    # 'keywords': "Keywords",
+    'first_page': "Page from",
+    'last_page': "Page to",
+    'publisher': "Publisher",
+    'pubmed_id': "PubMed ID",
+    'volume': "Volume",
+    'year': "Year",
+}
+REV_PUB_MAPPING = {v: k for k, v in PUB_MAPPING.items()}
+
+
+def serializer_from_biolomics(ws_data, client=None) -> Publication:
+    pub = Publication()
+
+    pub.record_id = ws_data[RECORD_ID]
+    pub.record_name = ws_data[RECORD_NAME]
+    pub.title = ws_data[RECORD_NAME]
+    for field, value in ws_data['RecordDetails'].items():
+        value = value['Value']
+        if not value:
+            continue
+        attr = REV_PUB_MAPPING.get(field, None)
+        if not attr:
+            continue
+        if attr in ('year', 'first_page', 'last_page'):
+            value = int(value)
+        setattr(pub, attr, value)
+    return pub
+
+
+def get_publication_record_name(publication):
+    if publication.record_name:
+        return publication.record_name
+    if publication.title:
+        return publication.title
+    if publication.pubmed_id:
+        return f'PUBMED:{publication.pubmed_id}'
+    if publication.doi:
+        return f'DOI:{publication.doi}'
+
+
+def serializer_to_biolomics(publication: Publication, client=None, update=False):
+    ws_data = {}
+    if publication.record_id:
+        ws_data[RECORD_ID] = publication.record_id
+    ws_data[RECORD_NAME] = get_publication_record_name(publication)
+    details = {}
+    for attr, field in PUB_MAPPING.items():
+        value = getattr(publication, attr, None)
+        if value is None:
+            continue
+        field_type = 'D' if attr == 'year' else "E"
+        details[field] = {'Value': value, 'FieldType': field_type}
+    ws_data['RecordDetails'] = details
+    return ws_data
--- a/mirri/biolomics/serializers/growth_media.py
+++ b/mirri/biolomics/serializers/growth_media.py
@ -0,0 +1,66 @@
+from mirri.biolomics.serializers import RECORD_ID, RECORD_NAME, RECORD_DETAILS
+from mirri.entities.growth_medium import GrowthMedium
+
+
+def serialize_from_biolomics(ws_data, client=None) -> GrowthMedium:
+    medium = GrowthMedium()
+    medium.record_name = ws_data.get('RecordName', None)
+    medium.description = get_growth_medium_record_name(medium)
+    medium.record_id = ws_data.get('RecordId', None)
+    for key, value in ws_data['RecordDetails'].items():
+        value = value['Value']
+        if not value:
+            continue
+
+        if key == "Full description":
+            medium.full_description = value
+        if key == "Ingredients":
+            medium.ingredients = value
+        if key == 'Medium description':
+            medium.description = value
+        if key == 'Other name':
+            medium.other_name= value
+        if key == 'pH':
+            medium.ph = value
+        if key == 'Sterilization conditions':
+            medium.sterilization_conditions = value
+    return medium
+
+
+def get_growth_medium_record_name(growth_medium):
+    if growth_medium.record_name:
+        return growth_medium.record_name
+    if growth_medium.description:
+        return growth_medium.description
+    if growth_medium.acronym:
+        return growth_medium.acronym
+
+
+GROWTH_MEDIUM_MAPPING = {
+    'acronym': 'Acronym',
+    'full_description': "Full description",
+    'ingredients': "Ingredients",
+    'description': 'Medium description',
+    'other_name': 'Other name',
+    'ph': 'pH',
+    'sterilization_conditions': 'Sterilization conditions'
+}
+
+
+def serialize_to_biolomics(growth_medium: GrowthMedium, client=None, update=False):
+    ws_data = {}
+    if growth_medium.record_id:
+        ws_data[RECORD_ID] = growth_medium.record_id
+    record_name = get_growth_medium_record_name(growth_medium)
+    ws_data[RECORD_NAME] = record_name
+    details = {}
+    for field in growth_medium.fields:
+        if field in ('acronym', 'record_id', 'record_name'):
+            continue
+        value = getattr(growth_medium, field, None)
+        if value is not None:
+            details[GROWTH_MEDIUM_MAPPING[field]] = {'Value': value, 'FieldType': 'E'}
+
+    ws_data[RECORD_DETAILS] = details
+    return ws_data
+
--- a/mirri/biolomics/serializers/locality.py
+++ b/mirri/biolomics/serializers/locality.py
@ -0,0 +1,26 @@
+from mirri.entities.location import Location
+
+
+def serialize_from_biolomics(ws_data, client=None):
+    return ws_data
+
+
+# this is a proof of concept
+def serialize_location(location: Location):
+    fields = {}
+    if location.country:
+        fields['Country'] = {'Value': location.country, 'FieldType': 'E'}
+    if location.latitude and location.longitude:
+        value = {'Latitude': location.latitude,
+                 'Longitude': location.longitude}
+        if location.coord_uncertainty:
+            value['Precision'] = location.coord_uncertainty
+        fields['GIS position'] = {'FieldType': 'L', 'Value': value}
+
+    fields['Strains'] = {"FieldType": "RLink", 'Value': [{
+        'Name': {'Value': None, 'FieldType': "E"},
+        'RecordId': None
+    }]}
+
+    return {"RecordDetails": fields,
+            "RecordName": location.country}
--- a/mirri/biolomics/serializers/ontobiotope.py
+++ b/mirri/biolomics/serializers/ontobiotope.py
@ -0,0 +1,2 @@
+def serialize_from_biolomics(ws_data, client=None):
+    return ws_data
--- a/mirri/biolomics/serializers/sequence.py
+++ b/mirri/biolomics/serializers/sequence.py
@ -0,0 +1,81 @@
+from mirri.entities.sequence import GenomicSequence
+from mirri.biolomics.serializers import RECORD_ID, RECORD_NAME, RECORD_DETAILS
+
+
+class GenomicSequenceBiolomics(GenomicSequence):
+    def __init__(self, **kwargs):
+        super().__init__(freeze=False, **kwargs)
+
+    @property
+    def record_id(self) -> int:
+        return self._data.get(RECORD_ID, None)
+
+    @record_id.setter
+    def record_id(self, value: int):
+        self._data[RECORD_ID] = value
+
+    @property
+    def record_name(self) -> str:
+        return self._data.get(RECORD_NAME, None)
+
+    @record_name.setter
+    def record_name(self, value: str):
+        self._data[RECORD_NAME] = value
+
+    def dict(self):
+        _data = super(GenomicSequenceBiolomics, self).dict()
+        if self.record_id:
+            _data[RECORD_ID] = self.record_id
+        if self.record_name:
+            _data[RECORD_NAME] = self.record_name
+        return _data
+
+
+def serialize_to_biolomics(marker: GenomicSequenceBiolomics, client=None, update=False):
+    ws_sequence = {}
+    print()
+    if marker.record_id:
+        ws_sequence[RECORD_ID] = marker.record_id
+    if marker.record_name:
+        ws_sequence[RECORD_NAME] = marker.record_name
+    else:
+        ws_sequence[RECORD_NAME] = marker.marker_id
+    details = {}
+    if marker.marker_id:
+        details["INSDC number"] = {"Value": marker.marker_id,
+                                   "FieldType": "E"}
+    if marker.marker_seq:
+        details["DNA sequence"] = {
+            "Value": {"Sequence": marker.marker_seq},
+            "FieldType": "N"}
+    if marker.marker_type:
+        details['Marker name'] = {"Value": marker.marker_type, "FieldType": "E"}
+
+    ws_sequence[RECORD_DETAILS] = details
+
+    return ws_sequence
+
+
+MAPPING_WS_SPEC_TYPES = {
+    'Beta tubulin': 'TUBB'
+}
+
+
+def serialize_from_biolomics(ws_data, client=None) -> GenomicSequenceBiolomics:
+    marker = GenomicSequenceBiolomics()
+    marker.record_id = ws_data[RECORD_ID]
+    marker.record_name = ws_data[RECORD_NAME]
+
+    for key, value in ws_data['RecordDetails'].items():
+        value = value['Value']
+        if key == 'INSDC number' and value:
+            marker.marker_id = value
+        elif key == 'Marker name' and value:
+            kind = MAPPING_WS_SPEC_TYPES.get(value, None)
+            value = kind if kind else value
+            marker.marker_type = value
+
+        elif key == 'DNA sequence' and 'Sequence' in value and value['Sequence']:
+            marker.marker_seq = value['Sequence']
+
+    return marker
--- a/mirri/biolomics/serializers/strain.py
+++ b/mirri/biolomics/serializers/strain.py
@ -0,0 +1,462 @@
+import re
+import sys
+import pycountry
+
+from mirri import rgetattr, rsetattr
+from mirri.entities.date_range import DateRange
+from mirri.entities.strain import ORG_TYPES, OrganismType, StrainId, StrainMirri, add_taxon_to_strain
+from mirri.biolomics.remote.endoint_names import (GROWTH_MEDIUM_WS, TAXONOMY_WS,
+                                                  ONTOBIOTOPE_WS, BIBLIOGRAPHY_WS, SEQUENCE_WS, COUNTRY_WS)
+from mirri.settings import (
+    ALLOWED_FORMS_OF_SUPPLY,
+    NAGOYA_PROBABLY_SCOPE,
+    NAGOYA_NO_RESTRICTIONS,
+    NAGOYA_DOCS_AVAILABLE,
+    NO_RESTRICTION,
+    ONLY_RESEARCH,
+    COMMERCIAL_USE_WITH_AGREEMENT,
+)
+from mirri.biolomics.settings import MIRRI_FIELDS
+from mirri.utils import get_pycountry
+
+NAGOYA_TRANSLATOR = {
+    NAGOYA_NO_RESTRICTIONS: "no known restrictions under the Nagoya protocol",
+    NAGOYA_DOCS_AVAILABLE: "documents providing proof of legal access and terms of use available at the collection",
+    NAGOYA_PROBABLY_SCOPE: "strain probably in scope, please contact the culture collection",
+}
+REV_NAGOYA_TRANSLATOR = {v: k for k, v in NAGOYA_TRANSLATOR.items()}
+
+RESTRICTION_USE_TRANSLATOR = {
+    NO_RESTRICTION: "no restriction apply",
+    ONLY_RESEARCH: "for research use only",
+    COMMERCIAL_USE_WITH_AGREEMENT: "for commercial development a special agreement is requested",
+}
+
+REV_RESTRICTION_USE_TRANSLATOR = {v: k for k,
+                                  v in RESTRICTION_USE_TRANSLATOR.items()}
+
+DATE_TYPE_FIELDS = ("Date of collection", "Date of isolation",
+                    "Date of deposit", "Date of inclusion in the catalogue")
+BOOLEAN_TYPE_FIELDS = ("Strain from a registered collection", "Dual use",
+                       "Quarantine in Europe", "Interspecific hybrid")  # , 'GMO')
+FILE_TYPE_FIELDS = ("MTA file", "ABS related files")
+MAX_MIN_TYPE_FIELDS = ("Tested temperature growth range",
+                       "Recommended growth temperature")
+LIST_TYPES_TO_JOIN = ('Other denomination', 'Plasmids collections fields', 'Plasmids')
+
+MARKER_TYPE_MAPPING = {
+    '16S rRNA': 'Sequences 16s', # or Sequences c16S rRNA
+    'ACT': 'Sequences ACT',
+    'CaM': 'Sequences CaM',
+    'EF-1α': 'Sequences TEF1a',
+    'ITS': 'Sequences ITS',
+    'LSU': 'Sequences LSU',
+    'RPB1': 'Sequences RPB1',
+    'RPB2': 'Sequences RPB2',
+    'TUBB': 'Sequences TUB' # or Sequences Beta tubulin
+}
+
+
+def serialize_to_biolomics(strain: StrainMirri, client=None, update=False,
+                           log_fhand=None):  # sourcery no-metrics
+    if log_fhand is None:
+        log_fhand = sys.stdout
+    strain_record_details = {}
+
+    for field in MIRRI_FIELDS:
+        try:
+            biolomics_field = field["biolomics"]["field"]
+            biolomics_type = field["biolomics"]["type"]
+        except KeyError:
+            # print(f'biolomics not configured: {field["label"]}')
+            continue
+
+        label = field["label"]
+        attribute = field["attribute"]
+        value = rgetattr(strain, attribute, None)
+        if value is None:
+            continue
+
+        if label == "Accession number":
+            value = f"{strain.id.collection} {strain.id.number}"
+        if label == "Restrictions on use":
+            value = RESTRICTION_USE_TRANSLATOR[value]
+        elif label == "Nagoya protocol restrictions and compliance conditions":
+            value = NAGOYA_TRANSLATOR[value]
+        elif label in FILE_TYPE_FIELDS:
+            value = [{"Name": "link", "Value": fname} for fname in value]
+        elif label == "Other culture collection numbers":
+            value = "; ".join(on.strain_id for on in value) if value else None
+        elif label in BOOLEAN_TYPE_FIELDS:
+            value = 'yes' if value else 'no'
+        elif label in 'GMO':
+            value = 'Yes' if value else 'No'
+        elif label == "Organism type":
+            org_types = [ot.name for ot in value]
+            value = []
+            for ot in ORG_TYPES.keys():
+                is_organism = "yes" if ot in org_types else "no"
+                value.append({"Name": ot, "Value": is_organism})
+        elif label == 'Taxon name':
+            if client:
+                taxa = strain.taxonomy.long_name.split(';')
+                value = []
+                for taxon_name in taxa:
+                    taxon = get_remote_rlink(client, TAXONOMY_WS,
+                                             taxon_name)
+                    if taxon:
+                        value.append(taxon)
+                if not value:
+                    msg = f'WARNING: {strain.taxonomy.long_name} not found in database'
+                    log_fhand.write(msg + '\n')
+                    # TODO: decide to raise or not if taxon not in MIRRI DB
+                    #raise ValueError(msg)
+
+        elif label in DATE_TYPE_FIELDS:
+            year = value._year
+            month = value._month or 1
+            day = value._day or 1
+            if year is None:
+                continue
+            value = f"{year}-{month:02}-{day:02}"
+        elif label == 'History of deposit':
+            value = " < ".join(value)
+        elif label in MAX_MIN_TYPE_FIELDS:
+            if isinstance(value, (int, float, str)):
+                _max, _min = float(value), float(value)
+            else:
+                _max, _min = float(value['max']), float(value['min'])
+
+            content = {"MaxValue": _max, "MinValue": _min,
+                       "FieldType": biolomics_type}
+            strain_record_details[biolomics_field] = content
+            continue
+        elif label in LIST_TYPES_TO_JOIN:
+            value = '; '.join(value)
+        # TODO: Check how to deal with crossrefs
+        elif label == "Recommended medium for growth":
+            if client is not None:
+                ref_value = []
+                for medium in value:
+                    ws_gm = client.retrieve_by_name(GROWTH_MEDIUM_WS, medium)
+                    if ws_gm is None:
+                        raise ValueError(
+                            f'Can not find the growth medium: {medium}')
+                    gm = {"Name": {"Value": medium, "FieldType": "E"},
+                          "RecordId": ws_gm.record_id}
+                    ref_value.append(gm)
+                value = ref_value
+            else:
+                continue
+
+        elif label == "Form of supply":
+            _value = []
+            for form in ALLOWED_FORMS_OF_SUPPLY:
+                is_form = "yes" if form in value else "no"
+                _value.append({"Name": form, "Value": is_form})
+            value = _value
+        # print(label, value), biolomics_field
+        elif label == "Coordinates of geographic origin":
+            value = {'Latitude': strain.collect.location.latitude,
+                     'Longitude': strain.collect.location.longitude}
+            precision = strain.collect.location.coord_uncertainty
+            if precision is not None:
+                value['Precision'] = precision
+        elif label == "Geographic origin":
+            if client is not None and value.country is not None:
+                country = get_pycountry(value.country)
+                if country is None:
+                    log_fhand.write(f'WARNING: {value.country} Not a valida country code/name\n')
+                else:
+                    _value = get_country_record(country, client)
+                    if _value is None:  # TODO: Remove this once the countries are added to the DB
+                        msg = f'WARNING: {value.country} not in MIRRI DB'
+                        log_fhand.write(msg + '\n')
+                        #raise ValueError(msg)
+                    else:
+                        content = {"Value": [_value], "FieldType": "RLink"}
+                        strain_record_details['Country'] = content
+            _value = []
+            for sector in ('state', 'municipality', 'site'):
+                sector_val = getattr(value, sector, None)
+                if sector_val:
+                    _value.append(sector_val)
+            value = "; ".join(_value) if _value else None
+            if value is None:
+                continue
+
+        elif label == "Ontobiotope":
+            if client and value:
+                onto = get_remote_rlink(client, ONTOBIOTOPE_WS, value)
+                value = [onto] if onto is not None else None
+        elif label == 'Literature':
+            if client and value:
+                pub_rlinks = []
+                for pub in value:
+                    rlink = get_remote_rlink(client, BIBLIOGRAPHY_WS, pub.title)
+                    if rlink:
+                        pub_rlinks.append(rlink)
+                if pub_rlinks:
+                    value = pub_rlinks
+            else:
+                continue
+
+        elif label == '':
+            pass
+
+        elif label == 'Ploidy':
+            value = _translate_polidy(value)
+        if value is not None:
+            content = {"Value": value, "FieldType": biolomics_type}
+            strain_record_details[biolomics_field] = content
+
+    # if False:
+    #     record_details["Data provided by"] = {
+    #         "Value": strain.id.collection, "FieldType": "V"}
+
+    #Markers
+    if client:
+        add_markers_to_strain_details(client, strain, strain_record_details)
+
+    strain_structure = {"RecordDetails": strain_record_details}
+    if update:
+        strain_structure['RecordId'] = strain.record_id
+        strain_structure['RecordName'] = strain.record_name
+    else:
+        strain_structure["Acronym"] = "MIRRI"
+
+    return strain_structure
+
+
+def add_markers_to_strain_details(client, strain: StrainMirri, details):
+    for marker in strain.genetics.markers:
+        marker_name = marker.marker_id
+        marker_in_ws = client.retrieve_by_name(SEQUENCE_WS, marker_name)
+        if marker_in_ws is None:
+            print('Marker not in web service')
+            continue
+        marker_type = marker.marker_type
+        ws_marker = {
+            "Value": [{
+                  "Name": {"Value": marker_in_ws.record_name,
+                           "FieldType": "E"},
+                  "RecordId": marker_in_ws.record_id
+            }],
+            "FieldType": "NLink"
+        }
+        if marker_in_ws.marker_seq:
+            ws_marker['Value'][0]["TargetFieldValue"] = {
+                "Value": {"Sequence": marker_in_ws.marker_seq},
+                "FieldType": "N"
+            }
+
+        details[MARKER_TYPE_MAPPING[marker_type]] = ws_marker
+
+
+def get_remote_rlink(client, endpoint, record_name):
+    entity = client.retrieve_by_name(endpoint, record_name)
+    if entity:
+        # some Endpoints does not serialize the json into a python object yet
+        try:
+            record_name = entity.record_name
+            record_id = entity.record_id
+        except AttributeError:
+            record_name = entity["RecordName"]
+            record_id = entity["RecordId"]
+        return {"Name": {"Value": record_name, "FieldType": "E"},
+                "RecordId": record_id}
+
+
+def add_strain_rlink_to_entity(record, strain_id, strain_name):
+    field_strain = {
+        "FieldType": "RLink",
+        'Value': [{
+            'Name': {'Value': strain_name, 'FieldType': "E"},
+            'RecordId': strain_id
+        }]
+    }
+    record['RecordDetails']['Strains'] = field_strain
+    return record
+
+
+PLOIDY_TRANSLATOR = {
+    0: 'Aneuploid',
+    1: 'Haploid',
+    2: 'Diploid',
+    3: 'Triploid',
+    4: 'Tetraploid',
+    9: 'Polyploid'
+}
+
+REV_PLOIDY_TRANSLATOR = {v: k for k, v in PLOIDY_TRANSLATOR.items()}
+
+
+def _translate_polidy(ploidy):
+    # print('ploidy in serializer', ploidy)
+    try:
+        ploidy = int(ploidy)
+    except TypeError:
+        return '?'
+    try:
+        ploidy = PLOIDY_TRANSLATOR[ploidy]
+    except KeyError:
+        ploidy = 'Polyploid'
+    return ploidy
+
+
+def serialize_from_biolomics(biolomics_strain, client=None):  # sourcery no-metrics
+    strain = StrainMirri()
+    strain.record_id = biolomics_strain.get('RecordId', None)
+    strain.record_name = biolomics_strain.get('RecordName', None)
+    for field in MIRRI_FIELDS:
+        try:
+            biolomics_field = field["biolomics"]["field"]
+        except KeyError:
+            # print(f'biolomics not configured: {field["label"]}')
+            continue
+
+        label = field["label"]
+        attribute = field["attribute"]
+        field_data = biolomics_strain['RecordDetails'].get(biolomics_field, None)
+        if field_data is None:
+            continue
+        is_empty = field_data.get('IsEmpty')
+        if is_empty:
+            continue
+        if biolomics_field in ('Tested temperature growth range', 'Recommended growth temperature'):
+            value = {'max': field_data.get('MaxValue', None),
+                     'min': field_data.get('MinValue', None)}
+        else:
+            value = field_data['Value']
+        # if value in (None, '', [], {}, '?', 'Unknown', 'nan', 'NaN'):
+        #     continue
+
+        # print(label, attribute, biolomics_field, value)
+
+        if label == 'Accession number':
+            number = strain.record_name
+            mirri_id = StrainId(number=number)
+            strain.synonyms = [mirri_id]
+            coll, num = value.split(' ', 1)
+            accession_number_id = StrainId(collection=coll, number=num)
+            strain.id = accession_number_id
+            continue
+        elif label == "Restrictions on use":
+            value = REV_RESTRICTION_USE_TRANSLATOR[value]
+        elif label == 'Nagoya protocol restrictions and compliance conditions':
+            value = REV_NAGOYA_TRANSLATOR[value]
+        elif label in FILE_TYPE_FIELDS:
+            value = [f['Value'] for f in value]
+        elif label == "Other culture collection numbers":
+            other_numbers = []
+            for on in value.split(";"):
+                on = on.strip()
+                try:
+                    collection, number = on.split(" ", 1)
+                except ValueError:
+                    collection = None
+                    number = on
+                _id = StrainId(collection=collection, number=number)
+                other_numbers.append(_id)
+            value = other_numbers
+        elif label in BOOLEAN_TYPE_FIELDS:
+            value = value == 'yes'
+        elif label == 'GMO':
+            value = value == 'Yes'
+        elif label == "Organism type":
+            organism_types = [OrganismType(item['Name']) for item in value if item['Value'] == 'yes']
+            if organism_types:
+                value = organism_types
+        elif label in 'Taxon name':
+            value = ";".join([v['Name']['Value'] for v in value])
+            add_taxon_to_strain(strain, value)
+            continue
+
+        elif label in DATE_TYPE_FIELDS:
+            # date_range = DateRange()
+            value = DateRange().strpdate(value)
+
+        elif label in ("Recommended growth temperature",
+                       "Tested temperature growth range"):
+            if (value['max'] is None or value['max'] == 0 or
+                    value['min'] is None and value['min'] == 0):
+                continue
+        elif label == "Recommended medium for growth":
+            value = [v['Name']['Value'] for v in value]
+        elif label == "Form of supply":
+            value = [item['Name'] for item in value if item['Value'] == 'yes']
+        elif label in LIST_TYPES_TO_JOIN:
+            value = [v.strip() for v in value.split(";")]
+        elif label == "Coordinates of geographic origin":
+            if ('Longitude' in value and 'Latitude' in value and
+                    isinstance(value['Longitude'], float) and
+                    isinstance(value['Latitude'], float)):
+                strain.collect.location.longitude = value['Longitude']
+                strain.collect.location.latitude = value['Latitude']
+                if value['Precision'] != 0:
+                    strain.collect.location.coord_uncertainty = value['Precision']
+            continue
+        elif label == "Altitude of geographic origin":
+            value = float(value)
+        elif label == "Geographic origin":
+            strain.collect.location.site = value
+            continue
+        elif label == 'Ontobiotope':
+            try:
+                value = re.search("(OBT:[0-9]{5,7})", value[0]['Name']['Value']).group()
+            except (KeyError, IndexError, AttributeError):
+                continue
+
+        elif label == 'Ploidy':
+            value = REV_PLOIDY_TRANSLATOR[value]
+        elif label == 'Literature':
+            if client is not None:
+                pubs = []
+                for pub in value:
+                    pub = client.retrieve_by_id(BIBLIOGRAPHY_WS, pub['RecordId'])
+                    pubs.append(pub)
+                value = pubs
+
+
+        rsetattr(strain, attribute, value)
+    # fields that are not in MIRRI FIELD list
+    # country
+    if 'Country' in biolomics_strain['RecordDetails'] and biolomics_strain['RecordDetails']['Country']:
+        try:
+            country_name = biolomics_strain['RecordDetails']['Country']['Value'][0]['Name']['Value']
+            country = get_pycountry(country_name)
+            country_3 = country.alpha_3 if country else None
+        except (IndexError, KeyError):
+            country_3 = None
+        if country_3:
+            strain.collect.location.country = country_3
+    # Markers:
+    if client:
+        markers = []
+        for marker_type, biolomics_marker in MARKER_TYPE_MAPPING.items():
+            try:
+                marker_value = biolomics_strain['RecordDetails'][biolomics_marker]['Value']
+            except KeyError:
+                continue
+            if not marker_value:
+                continue
+
+            for marker in marker_value:
+                record_id = marker['RecordId']
+                marker = client.retrieve_by_id(SEQUENCE_WS, record_id)
+                if marker is not None:
+                    markers.append(marker)
+        if markers:
+            strain.genetics.markers = markers
+
+    return strain
+
+
+def get_country_record(country, client):
+    for attr in ('common_name', 'name', 'official_name'):
+        val = getattr(country, attr, None)
+        if val is not None:
+            _value = get_remote_rlink(client, COUNTRY_WS, val)
+            if _value is not None:
+                return _value
+    return None
--- a/mirri/biolomics/serializers/taxonomy.py
+++ b/mirri/biolomics/serializers/taxonomy.py
@ -0,0 +1,64 @@
+
+from mirri.entities.strain import Taxonomy
+
+#TODO this is all wrong, needs deep revision
+
+class TaxonomyMirri(Taxonomy):
+    def __init__(self, **kwargs):
+        super().__init__(freeze=False, **kwargs)
+
+    fields = ['record_id', 'record_name', 'acronym', 'full_description',
+              'ingredients', 'description', 'other_name', 'ph',
+              'sterilization_conditions']
+
+    def __init__(self, **kwargs):
+        self._data = {}
+        for field in self.fields:
+            if field in kwargs and kwargs['field'] is not None:
+                value = kwargs['field']
+                setattr(self, field, value)
+
+    def __setattr__(self, attr, value):
+        if attr == '_data':
+            super().__setattr__(attr, value)
+            return
+        if attr not in self.fields:
+            raise TypeError(f'{attr} not an allowed attribute')
+        self._data[attr] = value
+
+    def __getattr__(self, attr):
+        if attr == '_data':
+            return super
+        if attr not in self.fields and attr != '_data':
+            raise TypeError(f'{attr} not an allowed attribute')
+        return self._data.get(attr, None)
+
+    def dict(self):
+        return self._data
+
+
+def serialize_from_biolomics(ws_data, client=None) -> TaxonomyMirri:
+
+    return ws_data
+    medium = GrowthMedium()
+    medium.record_name = ws_data.get('RecordName', None)
+    medium.record_id = ws_data.get('RecordId', None)
+    for key, value in ws_data['RecordDetails'].items():
+        value = value['Value']
+        if not value:
+            continue
+
+        if key == "Full description":
+            medium.full_description = value
+        if key == "Ingredients":
+            medium.ingredients = value
+        if key == 'Medium description':
+            medium.description = value
+        if key == 'Other name':
+            medium.other_name= value
+        if key == 'pH':
+            medium.ph = value
+        if key == 'Sterilization conditions':
+            medium.sterilization_conditions = value
+
+    return medium
--- a/mirri/biolomics/settings.py
+++ b/mirri/biolomics/settings.py
@ -0,0 +1,373 @@
+try:
+    from mirri.biolomics.secrets import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
+except ImportError:
+    raise ImportError(
+        'You need a secrets.py in the project dir. with CLIENT_ID, SECRET_ID, USERNAME, PASSWORD')
+
+MIRRI_FIELDS = [
+    {
+        "attribute": "id",
+        "label": "Accession number",
+        "mandatory": True,
+        "biolomics": {"field": "Collection accession number", "type": "E"},
+    },
+    {
+        "attribute": "restriction_on_use",
+        "label": "Restrictions on use",
+        "mandatory": True,
+        "biolomics": {"field": "Restrictions on use", "type": "T"},
+    },
+    {
+        "attribute": "nagoya_protocol",
+        "label": "Nagoya protocol restrictions and compliance conditions",
+        "mandatory": True,
+        "biolomics": {"field": "Nagoya protocol restrictions and compliance conditions", "type": "T"},
+    },
+    {
+        "attribute": "abs_related_files",
+        "label": "ABS related files",
+        "mandatory": False,
+        "biolomics": {"field": "ABS related files", "type": "U"},
+    },
+    {
+        "attribute": "mta_files",
+        "label": "MTA file",
+        "mandatory": False,
+        "biolomics": {"field": "MTA files URL", "type": "U"},
+    },
+    {
+        "attribute": "other_numbers",
+        "label": "Other culture collection numbers",
+        "mandatory": False,
+        "biolomics": {"field": "Other culture collection numbers", "type": "E"},
+    },
+    {
+        "attribute": "is_from_registered_collection",
+        "label": "Strain from a registered collection",
+        "mandatory": False,
+        "biolomics": {"field": "Strain from a registered collection", "type": "T"},
+    },
+    {
+        "attribute": "risk_group",
+        "label": "Risk Group",
+        "mandatory": True,
+        "biolomics": {"field": "Risk group", "type": "T"},
+    },
+    {
+        "attribute": "is_potentially_harmful",
+        "label": "Dual use",
+        "mandatory": False,
+        "biolomics": {"field": "Dual use", "type": "T"},
+    },
+    {
+        "attribute": "is_subject_to_quarantine",
+        "label": "Quarantine in Europe",
+        "mandatory": False,
+        "biolomics": {"field": "Quarantine in Europe", "type": "T"},
+    },
+    {
+        "attribute": "taxonomy.organism_type",
+        "label": "Organism type",
+        "mandatory": True,
+        "biolomics": {"field": "Organism type", "type": "C"},
+    },
+    {
+        "attribute": "taxonomy.long_name",
+        "label": "Taxon name",
+        "mandatory": True,
+        "biolomics": {"field": "Taxon name", "type": "SynLink"},
+    },
+    {
+        "attribute": "taxonomy.infrasubspecific_name",
+        "label": "Infrasubspecific names",
+        "mandatory": False,
+        "biolomics": {"field": "Infrasubspecific names", "type": "E"},
+    },
+    {
+        "attribute": "taxonomy.comments",
+        "label": "Comment on taxonomy",
+        "mandatory": False,
+        "biolomics": {"field": "Comment on taxonomy", "type": "E"},
+    },
+    {
+        "attribute": "taxonomy.interspecific_hybrid",
+        "label": "Interspecific hybrid",
+        "mandatory": False,
+        "biolomics": {"field": "Interspecific hybrid", "type": "T"},
+    },
+    {
+        "attribute": "status", "label": "Status", "mandatory": False,
+        "biolomics": {"field": "Status", "type": "E"},
+    },
+    {
+        "attribute": "history",
+        "label": "History of deposit",
+        "mandatory": False,
+        "biolomics": {"field": "History", "type": "E"},
+    },
+    {
+        "attribute": "deposit.who",
+        "label": "Depositor",
+        "mandatory": False,
+        "biolomics": {"field": "Depositor", "type": "E"},
+    },
+    {
+        "attribute": "deposit.date",
+        "label": "Date of deposit",
+        "mandatory": False,
+        "biolomics": {"field": "Deposit date", "type": "H"},
+    },
+    {
+        "attribute": "catalog_inclusion_date",
+        "label": "Date of inclusion in the catalogue",
+        "mandatory": False,
+        "biolomics": {"field": "Date of inclusion in the catalogue", "type": "H"},
+    },
+    {
+        "attribute": "collect.who",
+        "label": "Collected by",
+        "mandatory": False,
+        "biolomics": {"field": "Collector", "type": "E"},
+    },
+    {
+        "attribute": "collect.date",
+        "label": "Date of collection",
+        "mandatory": False,
+        "biolomics": {"field": "Collection date", "type": "H"},
+    },
+    {
+        "attribute": "isolation.who",
+        "label": "Isolated by",
+        "mandatory": False,
+        "biolomics": {"field": "Isolator", "type": "E"},
+    },
+    {
+        "attribute": "isolation.date",
+        "label": "Date of isolation",
+        "mandatory": False,
+        "biolomics": {"field": "Isolation date", "type": "H"},
+    },
+    {
+        "attribute": "isolation.substrate_host_of_isolation",
+        "label": "Substrate/host of isolation",
+        "mandatory": False,
+        "biolomics": {"field": "Substrate of isolation", "type": "E"},
+    },
+    {
+        "attribute": "growth.tested_temp_range",
+        "label": "Tested temperature growth range",
+        "mandatory": False,
+        "biolomics": {"field": "Tested temperature growth range", "type": "S"},
+    },
+    {
+        "attribute": "growth.recommended_temp",
+        "label": "Recommended growth temperature",
+        "mandatory": True,
+        "biolomics": {"field": "Recommended growth temperature", "type": "S"},
+    },
+    {
+        "attribute": "growth.recommended_media",
+        "label": "Recommended medium for growth",
+        "mandatory": True,
+        "biolomics": {"field": "Recommended growth medium", "type": "RLink"},
+    },
+    {
+        "attribute": "form_of_supply",
+        "label": "Form of supply",
+        "mandatory": True,
+        "biolomics": {"field": "Form", "type": "C"},
+    },
+    {
+        "attribute": "other_denominations",
+        "label": "Other denomination",
+        "mandatory": False,
+        "biolomics": {"field": "Other denomination", "type": "E"},
+    },
+    {
+        # here we use latitude to check if there is data in some of the fields
+        "attribute": "collect.location.latitude",
+        "label": "Coordinates of geographic origin",
+        "mandatory": False,
+        "biolomics": {"field": "Coordinates of geographic origin", "type": "L"},
+    },
+    {
+        "attribute": "collect.location.altitude",
+        "label": "Altitude of geographic origin",
+        "mandatory": False,
+        "biolomics": {"field": "Altitude of geographic origin", "type": "D"},
+    },
+    {
+        "attribute": "collect.location",
+        "label": "Geographic origin",
+        "mandatory": True,
+        "biolomics": {"field": "Geographic origin", "type": "E"},
+    },
+    {
+        "attribute": "collect.habitat",
+        "label": "Isolation habitat",
+        "mandatory": False,
+        "biolomics": {"field": "Isolation habitat", "type": "E"},
+    },
+    # {
+    #     "attribute": "collect.habitat_ontobiotope",
+    #     "label": "Ontobiotope term for the isolation habitat",
+    #     "mandatory": False,
+    #     "biolomics": {"field": "Ontobiotope term for the isolation habitat", "type": "E"},
+    # },
+    {
+        "attribute": "collect.habitat_ontobiotope",
+        "label": "Ontobiotope",
+        "mandatory": False,
+        "biolomics": {"field": "Ontobiotope", "type": "RLink"},
+    },
+    {
+        "attribute": "genetics.gmo", "label": "GMO", "mandatory": False,
+        "biolomics": {"field": "GMO", "type": "V"},
+    },
+    {
+        "attribute": "genetics.gmo_construction",
+        "label": "GMO construction information",
+        "mandatory": False,
+        "biolomics": {"field": "GMO construction information", "type": "E"},
+    },
+    {
+        "attribute": "genetics.mutant_info",
+        "label": "Mutant information",
+        "mandatory": False,
+        "biolomics": {"field": "Mutant information", "type": "E"},
+    },
+    {
+        "attribute": "genetics.genotype",
+        "label": "Genotype",
+        "mandatory": False,
+        "biolomics": {"field": "Genotype", "type": "E"},
+    },
+    {
+        "attribute": "genetics.sexual_state",
+        "label": "Sexual state",
+        "mandatory": False,
+        "biolomics": {"field": "Sexual state", "type": "E"},
+    },
+    {
+        "attribute": "genetics.ploidy",
+        "label": "Ploidy",
+        "mandatory": False,
+        "biolomics": {"field": "Ploidy", "type": "T"},
+    },
+    {
+        "attribute": "genetics.plasmids",
+        "label": "Plasmids",
+        "mandatory": False,
+        "biolomics": {"field": "Plasmids", "type": "E"},
+    },
+    {
+        "attribute": "genetics.plasmids_in_collections",
+        "label": "Plasmids collections fields",
+        "mandatory": False,
+        "biolomics": {"field": "Plasmids collections fields", "type": "E"},
+    },
+    {
+        "attribute": "publications",
+        "label": "Literature",
+        "mandatory": False,
+        "biolomics": {"field": "Literature", "type": "RLink"},
+    },
+    {
+        "attribute": "pathogenicity",
+        "label": "Pathogenicity",
+        "mandatory": False,
+        "biolomics": {"field": "Pathogenicity", "type": "E"},
+    },
+    {
+        "attribute": "enzyme_production",
+        "label": "Enzyme production",
+        "mandatory": False,
+        "biolomics": {"field": "Enzyme production", "type": "E"},
+    },
+    {
+        "attribute": "production_of_metabolites",
+        "label": "Production of metabolites",
+        "mandatory": False,
+        "biolomics": {"field": "Metabolites production", "type": "E"},
+    },
+    {
+        "attribute": "applications",
+        "label": "Applications",
+        "mandatory": False,
+        "biolomics": {"field": "Applications", "type": "E"},
+    },
+    {
+        "attribute": "remarks", "label": "Remarks", "mandatory": False,
+        "biolomics": {"field": "Remarks", "type": "E"},
+    },
+    {
+        "attribute": "literature_linked_to_the_sequence_genome",
+        "label": "Literature linked to the sequence/genome",
+        "mandatory": False,
+        # "biolomics": {"field": "MTA files URL", "type": "U"},
+    },
+]
+
+
+PUB_MIRRI_FIELDS = [
+    {
+        "attribute": "pub_id", "mandatory": False,
+        "biolomics": {"field": "", "type": "E"},
+    },
+    {
+        "attribute": "pubmed_id", "mandatory": False,
+        "biolomics": {"field": "PubMed ID", "type": "E"},
+    },
+    {
+        "attribute": "doi", "mandatory": False,
+        "biolomics": {"field": "DOI number", "type": "E"},
+    },
+    {
+        "attribute": "title", "mandatory": False,
+        "biolomics": {"field": "Title", "type": "E"},
+    },
+    {
+        "attribute": "authors", "mandatory": False,
+        "biolomics": {"field": "Authors", "type": "E"},
+    },
+    {
+        "attribute": "journal", "mandatory": False,
+        "biolomics": {"field": "Journal", "type": "E"},
+    },
+    {
+        "attribute": "volumen", "mandatory": False,
+        "biolomics": {"field": "Volume", "type": "E"},
+    },
+    {
+        "attribute": "issue", "mandatory": False,
+        "biolomics": {"field": "Issue", "type": "E"},
+    },
+    {
+        "attribute": "first_page", "mandatory": False,
+        "biolomics": {"field": "Page from", "type": "E"},
+    },
+    {
+        "attribute": "last_page", "mandatory": False,
+        "biolomics": {"field": "Page to", "type": "E"},
+    },
+    {
+        "attribute": "last_page", "label": "", "mandatory": False,
+        "biolomics": {"field": "", "type": "E"},
+    },
+    {
+        "attribute": "last_page", "label": "", "mandatory": False,
+        "biolomics": {"field": "", "type": "E"},
+    },
+    {
+        "attribute": "book_title", "label": "", "mandatory": False,
+        "biolomics": {"field": "Book title", "type": "E"},
+    },
+    {
+        "attribute": "publisher", "label": "", "mandatory": False,
+        "biolomics": {"field": "Publisher", "type": "E"},
+    },
+    {
+        "attribute": "editor", "label": "", "mandatory": False,
+        "biolomics": {"field": "Editor(s)", "type": "E"},
+    },
+]
--- a/mirri/data/ontobiotopes.csv
+++ b/mirri/data/ontobiotopes.csv
--- a/mirri/entities/init.py
+++ b/mirri/entities/init.py
--- a/mirri/entities/_private_classes.py
+++ b/mirri/entities/_private_classes.py
@ -0,0 +1,45 @@
+class FrozenClass(object):
+    __isfrozen = False
+
+    def __setattr__(self, key, value):
+        # print(dir(self))
+        if self.__isfrozen and not hasattr(self, key):
+            msg = f"Can not add {key} to {self.__class__.__name__}. It is not one of its attributes"
+            raise TypeError(msg)
+        object.__setattr__(self, key, value)
+
+    def _freeze(self):
+        self.__isfrozen = True
+
+
+class _FieldBasedClass(FrozenClass):
+    _fields = []
+
+    def __init__(self, data=None, freeze=True):
+        self._data = {}
+        if data is None:
+            data = {}
+        for field in self._fields:
+            value = data.get(field["label"], None)
+            setattr(self, field["attribute"], value)
+        if freeze:
+            self._freeze()
+
+    def __eq__(self, o: object) -> bool:
+        for field in self._fields:
+            val1 = getattr(self, field["attribute"], None)
+            val2 = getattr(o, field["attribute"], None)
+            if val1 != val2:
+                return False
+        return True
+
+    def __bool__(self):
+        return bool(self.dict())
+
+    def dict(self):
+        data = {}
+        for field in self._fields:
+            value = getattr(self, field["attribute"])
+            if value is not None:
+                data[field["label"]] = value
+        return data
--- a/mirri/entities/date_range.py
+++ b/mirri/entities/date_range.py
@ -0,0 +1,87 @@
+from calendar import monthrange
+from collections import OrderedDict
+from copy import copy
+from datetime import date
+
+
+class DateRange:
+    def __init__(self, year=None, month=None, day=None):
+        self._year = year
+        if month is not None and (month < 1 or month > 12):
+            raise ValueError("Month must be between 1 and 12")
+        self._month = month
+        if day is not None and (day < 1 or day > 31):
+            raise ValueError("Day must be between 1 and 31")
+        self._day = day
+
+        self._start = None
+        self._end = None
+        if year or month or day:
+            self._create_range()
+
+    def __str__(self):
+        _strdate = self.strfdate
+        if _strdate is None:
+            return ""
+
+        return _strdate
+
+    def __bool__(self):
+        return bool(self._year or self._month or self._day)
+
+    def _create_range(self):
+        year = self._year
+        month = self._month
+        day = self._day
+        if year and month and day:
+            start_date = date(year=year, month=month, day=day)
+            end_date = date(year=year, month=month, day=day)
+        elif month is None:
+            start_date = date(year=year, month=1, day=1)
+            end_date = date(year=year, month=12, day=31)
+        elif day is None:
+            month_last_day = monthrange(year, month)[1]
+            start_date = date(year=year, month=month, day=1)
+            end_date = date(year=year, month=month, day=month_last_day)
+
+        self._start = start_date
+        self._end = end_date
+
+    def strpdate(self, date_str: str):
+        date_str = str(date_str)
+        orig_date = copy(date_str)
+        date_str = date_str.replace("/", "").replace("-", "")
+        if len(date_str) > 8:
+            msg = f"Malformed date, Mora caracters than expected: {orig_date}"
+            raise ValueError(msg)
+        month = None
+        day = None
+        if len(date_str) >= 4:
+            year = int(date_str[:4])
+        if len(date_str) >= 6:
+            month = int(date_str[4:6])
+            if month < 1 or month > 12:
+                raise ValueError("Month must be between 1 and 12")
+        if len(date_str) >= 8:
+            day = int(date_str[6:8])
+            if day is not None and (day < 1 or day > 31):
+                raise ValueError("Day must be between 1 and 31")
+        self._year = year
+        self._month = month
+        self._day = day
+        self._create_range()
+        return self
+
+    @property
+    def strfdate(self):
+        year = "----" if self._year is None else f"{self._start.year:04}"
+        month = "--" if self._month is None else f"{self._start.month:02}"
+        day = "--" if self._day is None else f"{self._start.day:02}"
+        _date = str(f"{year}{month}{day}")
+        if _date == "--------":
+            return None
+        return _date
+
+    @property
+    def range(self):
+        return OrderedDict([("start", self._start), ("end", self._end)])
--- a/mirri/entities/growth_medium.py
+++ b/mirri/entities/growth_medium.py
@ -0,0 +1,47 @@
+class GrowthMedium:
+    fields = ['record_id', 'record_name', 'acronym', 'full_description',
+              'ingredients', 'description', 'other_name', 'ph',
+              'sterilization_conditions']
+
+    def __init__(self, **kwargs):
+        self._data = {}
+        for field in self.fields:
+            if field in kwargs and kwargs['field'] is not None:
+                value = kwargs['field']
+                setattr(self, field, value)
+
+    def __setattr__(self, attr, value):
+        if attr == '_data':
+            super().__setattr__(attr, value)
+            return
+        if attr not in self.fields:
+            raise TypeError(f'{attr} not an allowed attribute')
+        self._data[attr] = value
+
+    def __getattr__(self, attr):
+        if attr == '_data':
+            return super
+        if attr not in self.fields and attr != '_data':
+            raise TypeError(f'{attr} not an allowed attribute')
+        return self._data.get(attr, None)
+
+    def dict(self):
+        return self._data
+
+    def update(self, growth_media, include_fields=None):
+        for field in self.fields:
+            if include_fields and field in include_fields:
+                new_value = getattr(growth_media, field, None)
+                actual_value = getattr(self, field, None)
+                if new_value is not None and new_value != actual_value:
+                    setattr(self, field, new_value)
+
+    def is_equal(self, other, exclude_fields=[]):
+        for field in self.fields:
+            if field in exclude_fields:
+                continue
+            value_of_other = getattr(other, field, None)
+            value_of_self = getattr(self, field, None)
+            if value_of_self is not None and value_of_self != value_of_other:
+                return False
+        return True
--- a/mirri/entities/location.py
+++ b/mirri/entities/location.py
@ -0,0 +1,170 @@
+from __future__ import  annotations
+import hashlib
+from typing import Union
+
+from mirri.entities._private_classes import _FieldBasedClass
+from mirri.settings import (
+    ALTITUDE,
+    COORD_SPATIAL_REFERENCE,
+    COORDUNCERTAINTY,
+    COUNTRY,
+    GEOREF_METHOD,
+    ISLAND,
+    LATITUDE,
+    LONGITUDE,
+    MUNICIPALITY,
+    OTHER,
+    PROVINCE,
+    SITE,
+    STATE,
+)
+import pycountry
+
+
+class Location(_FieldBasedClass):
+    _fields = [
+        {"attribute": "country", "label": COUNTRY},
+        {"attribute": "state", "label": STATE},
+        {"attribute": "province", "label": PROVINCE},
+        {"attribute": "municipality", "label": MUNICIPALITY},
+        {"attribute": "site", "label": SITE},
+        {"attribute": "other", "label": OTHER},
+        {"attribute": "island", "label": ISLAND},
+        {"attribute": "longitude", "label": LONGITUDE},
+        {"attribute": "latitude", "label": LATITUDE},
+        {"attribute": "altitude", "label": ALTITUDE},
+        {"attribute": "coord_spatial_reference", "label": COORD_SPATIAL_REFERENCE},
+        {"attribute": "coord_uncertainty", "label": COORDUNCERTAINTY},
+        {"attribute": "georef_method", "label": GEOREF_METHOD},
+    ]
+
+    def __str__(self):
+        _site = []
+        if self.country:
+            _site.append(self.country)
+        if self.province:
+            _site.append(self.province)
+        if self.site:
+            _site.append(self.site)
+        if self.municipality:
+            _site.append(self.municipality)
+
+        return ": ".join(_site)
+
+    def __hash__(self):
+        hash_str = ''
+        for field in self._fields:
+            value = str(getattr(self, field, None))
+            hash_str += value
+        # hash_str = str(self.country) + str(self.province) + str(self.municipality) + str(self.site)
+        return int(hashlib.sha1(hash_str.encode("utf-8")).hexdigest(), 16) % (10 ** 8)
+
+    @property
+    def country(self) -> Union[str, None]:
+        return self._data.get(COUNTRY, None)
+
+    @country.setter
+    def country(self, code3: str):
+        if code3 is not None:
+            _country = pycountry.countries.get(alpha_3=code3)
+            if _country is None:
+                _country = pycountry.historic_countries.get(alpha_3=code3)
+            if _country is None and code3 != 'INW':
+                raise ValueError(f'{code3}, not a valid 3 letter country name')
+            self._data[COUNTRY] = code3
+
+    @property
+    def province(self) -> Union[str, None]:
+        return self._data.get(PROVINCE, None)
+
+    @province.setter
+    def province(self, code3: str):
+        self._data[PROVINCE] = code3
+
+    @property
+    def municipality(self) -> Union[str, None]:
+        return self._data.get(MUNICIPALITY, None)
+
+    @municipality.setter
+    def municipality(self, name: str):
+        self._data[MUNICIPALITY] = name
+
+    @property
+    def site(self) -> Union[str, None]:
+        return self._data.get(SITE, None)
+
+    @site.setter
+    def site(self, name: str):
+        self._data[SITE] = name
+
+    @property
+    def latitude(self):
+        return self._data.get(LATITUDE, None)
+
+    @latitude.setter
+    def latitude(self, latitude: float):
+        self._data[LATITUDE] = latitude
+
+    @property
+    def longitude(self) -> Union[float, None]:
+        return self._data.get(LONGITUDE, None)
+
+    @longitude.setter
+    def longitude(self, longitude: float):
+        self._data[LONGITUDE] = longitude
+
+    @property
+    def altitude(self) -> Union[int, float, None]:
+        return self._data.get(ALTITUDE, None)
+
+    @altitude.setter
+    def altitude(self, altitude: Union[int, float]):
+        self._data[ALTITUDE] = altitude
+
+    @property
+    def georef_method(self) -> Union[str, None]:
+        return self._data.get(GEOREF_METHOD, None)
+
+    @georef_method.setter
+    def georef_method(self, georef_method: str):
+        self._data[GEOREF_METHOD] = georef_method
+
+    @property
+    def coord_uncertainty(self) -> Union[str, None]:
+        return self._data.get(COORDUNCERTAINTY, None)
+
+    @coord_uncertainty.setter
+    def coord_uncertainty(self, coord_uncertainty: str):
+        self._data[COORDUNCERTAINTY] = coord_uncertainty
+
+    @property
+    def coord_spatial_reference(self) -> Union[str, None]:
+        return self._data.get(COORD_SPATIAL_REFERENCE, None)
+
+    @coord_spatial_reference.setter
+    def coord_spatial_reference(self, coord_spatial_reference: str):
+        self._data[COORD_SPATIAL_REFERENCE] = coord_spatial_reference
+
+    @property
+    def state(self) -> Union[str, None]:
+        return self._data.get(STATE, None)
+
+    @state.setter
+    def state(self, state):
+        self._data[STATE] = state
+
+    @property
+    def island(self) -> Union[str, None]:
+        return self._data.get(ISLAND, None)
+
+    @island.setter
+    def island(self, island):
+        self._data[ISLAND] = island
+
+    @property
+    def other(self) -> Union[str, None]:
+        return self._data.get(OTHER, None)
+
+    @other.setter
+    def other(self, other):
+        self._data[OTHER] = other
--- a/mirri/entities/publication.py
+++ b/mirri/entities/publication.py
@ -0,0 +1,202 @@
+from mirri.settings import (BOOK_EDITOR, BOOK_PUBLISHER, BOOK_TITLE,
+                            PUB_AUTHORS, PUB_DOI, PUB_FIRST_PAGE, PUB_ID,
+                            PUB_ISSUE, PUB_JOURNAL, PUB_LAST_PAGE,
+                            PUB_PUBMED_ID, PUB_TITLE, PUB_VOLUME)
+
+# Maybe we could implement some crossref calls to fill all field data
+# and get DOI where ever is possible
+
+RECORD_ID = 'RecordId'
+RECORD_NAME = 'RecordName'
+
+
+class Publication:
+    def __init__(self, data=None):
+        self._data = {}
+        if data:
+            self.record_id = data.get('RecordId', None)
+            self.record_name = data.get('RecordName', None)
+            self.pubmed_id = data.get(PUB_PUBMED_ID, None)
+            self.doi = data.get(PUB_DOI, None)
+            self.title = data.get(PUB_TITLE, None)
+            self.authors = data.get(PUB_AUTHORS, None)
+            self.journal = data.get(PUB_JOURNAL, None)
+            self.volume = data.get(PUB_VOLUME, None)
+            self.issue = data.get(PUB_ISSUE, None)
+            self.first_page = data.get(PUB_FIRST_PAGE, None)
+            self.last_page = data.get(PUB_LAST_PAGE, None)
+            self.editor = data.get(BOOK_EDITOR, None)
+            self.publisher = data.get(BOOK_PUBLISHER, None)
+            self.book_title = data.get(BOOK_TITLE, None)
+            self.isbn = data.get('ISBN', None)
+            self.issn = data.get('ISSN', None)
+            self.year = data.get('Year', None)
+
+    def __bool__(self):
+        return bool(self._data)
+
+    def dict(self):
+        return self._data
+
+    @property
+    def id(self) -> int:
+        return self._data.get(PUB_ID, None)
+
+    @id.setter
+    def id(self, value: int):
+        if value is not None:
+            self._data[PUB_ID] = value
+
+    @property
+    def record_id(self) -> int:
+        return self._data.get(RECORD_ID, None)
+
+    @record_id.setter
+    def record_id(self, value: int):
+        if value is not None:
+            self._data[RECORD_ID] = value
+
+    @property
+    def record_name(self) -> int:
+        return self._data.get(RECORD_NAME, None)
+
+    @record_name.setter
+    def record_name(self, value: int):
+        if value is not None:
+            self._data[RECORD_NAME] = value
+
+    @property
+    def pubmed_id(self):
+        return self._data.get(PUB_PUBMED_ID, None)
+
+    @pubmed_id.setter
+    def pubmed_id(self, value: str):
+        if value is not None:
+            self._data[PUB_PUBMED_ID] = value
+
+    @property
+    def isbn(self):
+        return self._data.get('ISBN', None)
+
+    @isbn.setter
+    def isbn(self, value: str):
+        if value is not None:
+            self._data['ISBN'] = value
+
+    @property
+    def issn(self):
+        return self._data.get('ISSN', None)
+
+    @issn.setter
+    def issn(self, value: str):
+        if value is not None:
+            self._data['ISSN'] = value
+
+    @property
+    def doi(self):
+        return self._data.get(PUB_DOI, None)
+
+    @doi.setter
+    def doi(self, value: str):
+        if value is not None:
+            self._data[PUB_DOI] = value
+
+    @property
+    def title(self):
+        return self._data.get(PUB_TITLE, None)
+
+    @title.setter
+    def title(self, value: str):
+        if value is not None:
+            self._data[PUB_TITLE] = value
+            self._data[RECORD_NAME] = value
+
+    @property
+    def authors(self):
+        return self._data.get(PUB_AUTHORS, None)
+
+    @authors.setter
+    def authors(self, value: str):
+        if value is not None:
+            self._data[PUB_AUTHORS] = value
+
+    @property
+    def journal(self):
+        return self._data.get(PUB_JOURNAL, None)
+
+    @journal.setter
+    def journal(self, value: str):
+        if value is not None:
+            self._data[PUB_JOURNAL] = value
+
+    @property
+    def volume(self):
+        return self._data.get(PUB_VOLUME, None)
+
+    @volume.setter
+    def volume(self, value: str):
+        if value is not None:
+            self._data[PUB_VOLUME] = value
+
+    @property
+    def issue(self):
+        return self._data.get(PUB_ISSUE, None)
+
+    @issue.setter
+    def issue(self, value: str):
+        if value is not None:
+            self._data[PUB_ISSUE] = value
+
+    @property
+    def first_page(self):
+        return self._data.get(PUB_FIRST_PAGE, None)
+
+    @first_page.setter
+    def first_page(self, value: str):
+        if value is not None:
+            self._data[PUB_FIRST_PAGE] = value
+
+    @property
+    def last_page(self):
+        return self._data.get(PUB_LAST_PAGE, None)
+
+    @last_page.setter
+    def last_page(self, value: str):
+        if value is not None:
+            self._data[PUB_LAST_PAGE] = value
+
+    @property
+    def book_title(self):
+        return self._data.get(BOOK_TITLE, None)
+
+    @book_title.setter
+    def book_title(self, value: str):
+        if value is not None:
+            self._data[BOOK_TITLE] = value
+
+    @property
+    def editors(self):
+        return self._data.get(BOOK_EDITOR, None)
+
+    @editors.setter
+    def editors(self, value: str):
+        if value is not None:
+            self._data[BOOK_EDITOR] = value
+
+    @property
+    def publisher(self):
+        return self._data.get(BOOK_PUBLISHER, None)
+
+    @publisher.setter
+    def publisher(self, value: str):
+        if value is not None:
+            self._data[BOOK_PUBLISHER] = value
+
+    @property
+    def year(self) -> int:
+        return self._data.get('Year', None)
+
+    @year.setter
+    def year(self, value: int):
+        if value is not None:
+            self._data['Year'] = value
--- a/mirri/entities/sequence.py
+++ b/mirri/entities/sequence.py
@ -0,0 +1,45 @@
+from mirri.entities._private_classes import _FieldBasedClass
+from mirri.settings import (
+    ALLOWED_MARKER_TYPES,
+    MARKER_INSDC,
+    MARKER_SEQ,
+    MARKER_TYPE)
+
+from mirri import  ValidationError
+
+
+class GenomicSequence(_FieldBasedClass):
+    _fields = [
+        {"attribute": "marker_type", "label": MARKER_TYPE},
+        {"attribute": "marker_id", "label": MARKER_INSDC},
+        {"attribute": "marker_seq", "label": MARKER_SEQ},
+    ]
+
+    @property
+    def marker_type(self):
+        return self._data.get(MARKER_TYPE, None)
+
+    @marker_type.setter
+    def marker_type(self, value: str):
+        if value is not None:
+            types = " ".join([m["acronym"] for m in ALLOWED_MARKER_TYPES])
+            if value not in types:
+                msg = f"{value} not in allowed marker types: {types}"
+                raise ValidationError(msg)
+            self._data[MARKER_TYPE] = value
+
+    @property
+    def marker_id(self) -> str:
+        return self._data.get(MARKER_INSDC, None)
+
+    @marker_id.setter
+    def marker_id(self, value: str):
+        self._data[MARKER_INSDC] = value
+
+    @property
+    def marker_seq(self) -> str:
+        return self._data.get(MARKER_SEQ, None)
+
+    @marker_seq.setter
+    def marker_seq(self, value: str):
+        self._data[MARKER_SEQ] = value
--- a/mirri/entities/strain.py
+++ b/mirri/entities/strain.py
--- a/mirri/io/init.py
+++ b/mirri/io/init.py
--- a/mirri/io/parsers/init.py
+++ b/mirri/io/parsers/init.py
--- a/mirri/io/parsers/excel.py
+++ b/mirri/io/parsers/excel.py
@ -0,0 +1,79 @@
+from io import BytesIO
+from openpyxl import load_workbook
+
+
+def excel_dict_reader(fhand, sheet_name, mandatory_column_name=None):
+    fhand.seek(0)
+    wb = load_workbook(filename=BytesIO(fhand.read()), data_only=True,
+                       read_only=True)
+    return workbook_sheet_reader(wb, sheet_name, mandatory_column_name=mandatory_column_name)
+
+
+def is_none(value):
+    return value is None
+
+
+def workbook_sheet_reader(workbook, sheet_name, mandatory_column_name=None,
+                          allowed_empty_line_slots=5):
+    try:
+        sheet = workbook[sheet_name]
+    except KeyError as error:
+        raise ValueError(f"The '{sheet_name}' sheet is missing.") from error
+
+    first = True
+    header = []
+    empty_lines = 0
+    for row in sheet.rows:
+        values = []
+        for cell in row:
+            if cell.value is not None and cell.data_type == 's':
+                value = str(cell.value).strip()
+            else:
+                value = cell.value
+            values.append(value)
+        # values = [cell.value.strip() for cell in row]
+        if first:
+            header = values
+            first = False
+            continue
+        if not any(values):
+            empty_lines += 1
+            if empty_lines >= allowed_empty_line_slots:
+                break
+            continue
+        empty_lines = 0
+
+        data = dict(zip(header, values))
+        if mandatory_column_name is not None and not data[mandatory_column_name]:
+            # msg = f"Exiting before end of sheet {sheet_name} ends.\n"
+            # msg += f"Mandatory column ({mandatory_column_name}) empty. \n"
+            # msg += "Check file for empty lines"
+            # print(msg)
+            continue
+        yield data
+
+
+def get_all_cell_data_from_sheet(workbook, sheet_name, allowed_empty_line_slots=5):
+    try:
+        sheet = workbook[sheet_name]
+    except KeyError as error:
+        raise ValueError(f"The '{sheet_name}' sheet is missing.") from error
+
+    empty_lines = 0
+    all_values = []
+    for row in sheet.rows:
+        values = []
+        for cell in row:
+            if cell.value is not None and cell.data_type == 's':
+                value = str(cell.value).strip()
+            else:
+                value = cell.value
+            values.append(value)
+        if not any(values):
+            empty_lines += 1
+            if empty_lines >= allowed_empty_line_slots:
+                break
+            continue
+        empty_lines = 0
+        all_values.extend(values)
+    return all_values
--- a/mirri/io/parsers/mirri_excel.py
+++ b/mirri/io/parsers/mirri_excel.py
@ -0,0 +1,276 @@
+import re
+from datetime import date
+from io import BytesIO
+
+import pycountry
+from openpyxl import load_workbook
+
+from mirri import rsetattr, ValidationError
+from mirri.biolomics.serializers.sequence import GenomicSequenceBiolomics
+from mirri.biolomics.serializers.strain import StrainMirri
+from mirri.entities.growth_medium import GrowthMedium
+from mirri.io.parsers.excel import workbook_sheet_reader
+from mirri.entities.publication import Publication
+from mirri.entities.date_range import DateRange
+from mirri.entities.strain import OrganismType, StrainId, add_taxon_to_strain
+from mirri.settings import (COMMERCIAL_USE_WITH_AGREEMENT, GENOMIC_INFO,
+                            GROWTH_MEDIA, LITERATURE_SHEET, LOCATIONS,
+                            MIRRI_FIELDS, NAGOYA_DOCS_AVAILABLE, NAGOYA_NO_RESTRICTIONS,
+                            NAGOYA_PROBABLY_SCOPE, NO_RESTRICTION,
+                            ONLY_RESEARCH, ONTOBIOTOPE,
+                            PUBLICATION_FIELDS, STRAINS, SUBTAXAS)
+from mirri.utils import get_country_from_name
+
+RESTRICTION_USE_TRANSLATOR = {
+    1: NO_RESTRICTION,
+    2: ONLY_RESEARCH,
+    3: COMMERCIAL_USE_WITH_AGREEMENT,
+}
+NAGOYA_TRANSLATOR = {
+    1: NAGOYA_NO_RESTRICTIONS,
+    2: NAGOYA_DOCS_AVAILABLE,
+    3: NAGOYA_PROBABLY_SCOPE,
+}
+TRUEFALSE_TRANSLATOR = {
+    1: False,
+    2: True
+}
+
+
+def parse_mirri_excel(fhand, version="20200601"):
+    if version == "20200601":
+        return _parse_mirri_v20200601(fhand)
+    else:
+        raise NotImplementedError("Only version 20200601 is implemented")
+
+
+def _parse_mirri_v20200601(fhand):
+    fhand.seek(0)
+    file_content = BytesIO(fhand.read())
+    wb = load_workbook(filename=file_content, read_only=True, data_only=True)
+
+    locations = workbook_sheet_reader(wb, LOCATIONS)
+    ontobiotopes = workbook_sheet_reader(wb, ONTOBIOTOPE)
+
+    growth_media = list(parse_growth_media(wb))
+
+    markers = workbook_sheet_reader(wb, GENOMIC_INFO)
+
+    publications = list(parse_publications(wb))
+
+    strains = parse_strains(wb, locations=locations,  growth_media=growth_media,
+                            markers=markers, publications=publications,
+                            ontobiotopes=ontobiotopes)
+
+    return {"strains": strains, "growth_media": growth_media}
+
+
+def index_list_by(list_, id_):
+    return {str(item[id_]): item for item in list_}
+
+
+def index_list_by_attr(list_, id_):
+    return {str(getattr(item, id_)): item for item in list_}
+
+
+def index_markers(markers):
+    indexed_markers = {}
+    for marker in markers:
+        strain_id = marker["Strain AN"]
+        if strain_id not in indexed_markers:
+            indexed_markers[strain_id] = []
+        indexed_markers[strain_id].append(marker)
+    return indexed_markers
+
+
+def remove_hard_lines(string=None):
+    if string is not None and string != '':
+        return re.sub(r'\r+\n+|\t+', '', string).strip()
+    else:
+        return None
+
+
+def parse_growth_media(wb):
+    for row in workbook_sheet_reader(wb, GROWTH_MEDIA):
+        gm = GrowthMedium()
+        gm.acronym = str(row['Acronym'])
+        gm.description = row['Description']
+        gm.full_description = remove_hard_lines(row.get('Full description', None))
+
+        yield gm
+
+
+def parse_publications(wb):
+    ids = []
+    for row in workbook_sheet_reader(wb, LITERATURE_SHEET):
+        pub = Publication()
+        for pub_field in PUBLICATION_FIELDS:
+            label = pub_field["label"]
+            col_val = row.get(label, None)
+
+            if col_val:
+                attribute = pub_field["attribute"]
+                setattr(pub, attribute, col_val)
+        yield pub
+
+
+def parse_strains(wb, locations, growth_media, markers, publications,
+                  ontobiotopes):
+
+    ontobiotopes_by_id = {str(ont["ID"]): ont['Name'] for ont in ontobiotopes}
+    ontobiotopes_by_name = {v: k for k, v in ontobiotopes_by_id.items()}
+
+    locations = index_list_by(locations, 'Locality')
+    growth_media = index_list_by_attr(growth_media, 'acronym')
+    publications = index_list_by_attr(publications, 'id')
+    markers = index_markers(markers)
+
+    for strain_row in workbook_sheet_reader(wb, STRAINS, "Accession number"):
+        strain = StrainMirri()
+        strain_id = None
+        label = None
+        for field in MIRRI_FIELDS:
+            label = field["label"]
+            attribute = field["attribute"]
+            value = strain_row[label]
+            if value is None or value == '':
+                continue
+
+            if attribute == "id":
+                collection, number = value.split(" ", 1)
+                value = StrainId(collection=collection, number=number)
+                rsetattr(strain, attribute, value)
+
+            elif attribute == "restriction_on_use":
+                rsetattr(strain, attribute, RESTRICTION_USE_TRANSLATOR[value])
+            elif attribute == "nagoya_protocol":
+                rsetattr(strain, attribute, NAGOYA_TRANSLATOR[value])
+            elif attribute == "other_numbers":
+                other_numbers = []
+                for on in value.split(";"):
+                    on = on.strip()
+                    try:
+                        collection, number = on.split(" ", 1)
+                    except ValueError:
+                        collection = None
+                        number = on
+                    _id = StrainId(collection=collection, number=number)
+                    other_numbers.append(_id)
+                rsetattr(strain, attribute, other_numbers)
+            elif attribute == "taxonomy.taxon_name":
+                try:
+                    add_taxon_to_strain(strain, value)
+                except ValueError:
+                    msg = f"The '{label}' for strain with Accession Number {strain_id} is not according to the specification."
+                    raise ValidationError(msg)
+            elif attribute == "taxonomy.organism_type":
+                value = [OrganismType(val.strip())
+                         for val in str(value).split(";")]
+                rsetattr(strain, attribute, value)
+            elif attribute in ("deposit.date", "collect.date", "isolation.date",
+                               "catalog_inclusion_date"):
+                if isinstance(value, date):
+                    value = DateRange(
+                        year=value.year, month=value.month, day=value.day
+                    )
+                elif isinstance(value, str):
+                    value = DateRange().strpdate(value)
+                else:
+                    raise NotImplementedError()
+                rsetattr(strain, attribute, value)
+            elif attribute == 'growth.recommended_temp':
+                temps = value.split(';')
+                if len(temps) == 1:
+                    _min, _max = float(temps[0]), float(temps[0])
+                else:
+                    _min, _max = float(temps[0]), float(temps[1])
+                rsetattr(strain, attribute, {'min': _min, 'max': _max})
+            elif attribute == "growth.recommended_media":
+                sep = "/"
+                if ";" in value:
+                    sep = ";"
+                growth_media = [v.strip() for v in value.split(sep)]
+                rsetattr(strain, attribute, growth_media)
+            elif attribute == 'growth.tested_temp_range':
+                if value:
+                    min_, max_ = value.split(";")
+                    value = {'min': float(min_), 'max': float(max_)}
+                    rsetattr(strain, attribute, value)
+            elif attribute == "form_of_supply":
+                rsetattr(strain, attribute, value.split(";"))
+            elif attribute == "collect.location.coords":
+                items = value.split(";")
+                strain.collect.location.latitude = float(items[0])
+                strain.collect.location.longitude = float(items[1])
+                if len(items) > 2:
+                    strain.collect.location.coord_uncertainty = items[2]
+
+            elif attribute == "collect.location":
+                location = locations[value]
+                if 'Country' in location and location['Country']:
+                    if location['Country'] == 'Unknown':
+                        continue
+                    country_3 = _get_country_alpha3(location['Country'])
+                    strain.collect.location.country = country_3
+                strain.collect.location.state = location["Region"]
+                strain.collect.location.municipality = location["City"]
+                strain.collect.location.site = location["Locality"]
+            elif attribute in ("abs_related_files", "mta_files"):
+                rsetattr(strain, attribute, value.split(";"))
+            elif attribute in ("is_from_registered_collection",
+                               "is_subject_to_quarantine", 'taxonomy.interspecific_hybrid',
+                               "is_potentially_harmful", "genetics.gmo"):
+                rsetattr(strain, attribute, TRUEFALSE_TRANSLATOR[value])
+            elif attribute == "publications":
+                value = str(value)
+                pubs = []
+                pub_ids = [v.strip() for v in str(value).split(";")]
+                for pub_id in pub_ids:
+                    pub = publications.get(pub_id, None)
+                    if pub is None:
+                        pub = Publication()
+                        if '/' in pub_id:
+                            pub.doi = pub_id
+                        else:
+                            pub.pubmed_id = pub_id
+                    pubs.append(pub)
+                rsetattr(strain, attribute, pubs)
+            elif attribute == 'ontobiotope':
+                values = []
+                for val in value.split(';'):
+                    if val not in ontobiotopes_by_id:
+                        val = ontobiotopes_by_name[val]
+                    values.append(val)
+                rsetattr(strain, attribute, value)
+            elif attribute == 'other_denominations':
+                value = [v.strip() for v in value.split(';')]
+                rsetattr(strain, attribute, value)
+            elif attribute == 'genetics.plasmids':
+                value = [v.strip() for v in value.split(';')]
+                rsetattr(strain, attribute, value)
+            else:
+                #print(attribute, value, type(value))
+                rsetattr(strain, attribute, value)
+
+        # add markers
+        strain_id = strain.id.strain_id
+        if strain_id in markers:
+            for marker in markers[strain_id]:
+                _marker = GenomicSequenceBiolomics()
+                _marker.marker_id = marker["INSDC AN"]
+                _marker.marker_type = marker["Marker"]
+                _marker.marker_seq = marker["Sequence"]
+                strain.genetics.markers.append(_marker)
+        yield strain
+
+
+def _get_country_alpha3(loc_country):
+    if loc_country == 'INW':
+        return loc_country
+    country = get_country_from_name(loc_country)
+    if not country:
+        country = pycountry.countries.get(alpha_3=loc_country)
+    if not country:
+        country = pycountry.historic_countries.get(alpha_3=loc_country)
+    country_3 = country.alpha_3
+    return country_3
--- a/mirri/io/writers/init.py
+++ b/mirri/io/writers/init.py
--- a/mirri/io/writers/mirri_excel.py
+++ b/mirri/io/writers/mirri_excel.py
@ -0,0 +1,305 @@
+import csv
+from copy import deepcopy
+from openpyxl.workbook.workbook import Workbook
+
+
+from mirri import rgetattr
+from mirri.settings import GROWTH_MEDIA, MIRRI_FIELDS, DATA_DIR, PUBLICATION_FIELDS
+from mirri.io.parsers.mirri_excel import NAGOYA_TRANSLATOR, RESTRICTION_USE_TRANSLATOR
+
+INITIAL_SEXUAL_STATES = [
+    "Mata",
+    "Matalpha",
+    "Mata/Matalpha",
+    "Mata",
+    "Matb",
+    "Mata/Matb",
+    "MTLa",
+    "MTLalpha",
+    "MTLa/MTLalpha",
+    "MAT1-1",
+    "MAT1-2",
+    "MAT1",
+    "MAT2",
+    "MT+",
+    "MT-",
+    "MT+",
+    "MT-",
+    "H+",
+    "H-",
+]
+MARKER_FIELDS = [
+    {"attribute": "acronym", "label": "Acronym", "mandatory": True},
+    {"attribute": "marker", "label": "Marker", "mandatory": True},
+]
+MARKER_DATA = [
+    {"acronym": "16S rRNA", "marker": "16S rRNA"},
+    {"acronym": "ACT", "marker": "Actin"},
+    {"acronym": "CaM", "marker": "Calmodulin"},
+    {"acronym": "EF-1α", "marker": "elongation factor 1-alpha (EF-1α)"},
+    {"acronym": "ITS", "marker": "nuclear ribosomal Internal Transcribed Spacer (ITS)"},
+    {"acronym": "LSU", "marker": "nuclear ribosomal Large SubUnit (LSU)"},
+    {"acronym": "RPB1", "marker": "Ribosomal RNA-coding genes RPB1"},
+    {"acronym": "RPB2", "marker": "Ribosomal RNA-coding genes RPB2"},
+    {"acronym": "TUBB", "marker": "β-Tubulin"},
+]
+
+REV_RESTRICTION_USE_TRANSLATOR = {v: k for k, v in RESTRICTION_USE_TRANSLATOR.items()}
+REV_NAGOYA_TRANSLATOR = {v: k for k, v in NAGOYA_TRANSLATOR.items()}
+PUB_HEADERS = [pb["label"] for pb in PUBLICATION_FIELDS]
+
+
+def write_mirri_excel(path, strains, growth_media, version):
+    if version == "20200601":
+        _write_mirri_excel_20200601(path, strains, growth_media)
+
+
+def _write_mirri_excel_20200601(path, strains, growth_media):
+    wb = Workbook()
+
+    write_markers_sheet(wb)
+
+    ontobiotope_path = DATA_DIR / "ontobiotopes.csv"
+    write_ontobiotopes(wb, ontobiotope_path)
+
+    write_growth_media(wb, growth_media)
+    growth_media_indexes = [str(gm.acronym) for gm in growth_media]
+
+    locations = {}
+    publications = {}
+    sexual_states = set(deepcopy(INITIAL_SEXUAL_STATES))
+    genomic_markers = {}
+    strains_data = _deserialize_strains(strains, locations, growth_media_indexes,
+                                        publications, sexual_states, genomic_markers)
+    strains_data = list(strains_data)
+
+    # write strain to generate indexed data
+    strain_sheet = wb.create_sheet("Strains")
+    strain_sheet.append([field["label"] for field in MIRRI_FIELDS])
+    for strain_row in strains_data:
+        strain_sheet.append(strain_row)
+    redimension_cell_width(strain_sheet)
+
+    # write locations
+    loc_sheet = wb.create_sheet("Geographic origin")
+    loc_sheet.append(["ID", "Country", "Region", "City", "Locality"])
+    for index, loc_index in enumerate(locations.keys()):
+        location = locations[loc_index]
+        row = [index, location.country, location.state, location.municipality,
+               loc_index]
+        loc_sheet.append(row)
+    redimension_cell_width(loc_sheet)
+
+    # write publications
+    pub_sheet = wb.create_sheet("Literature")
+    pub_sheet.append(PUB_HEADERS)
+    for publication in publications.values():
+        row = []
+        for pub_field in PUBLICATION_FIELDS:
+            # if pub_field['attribute'] == 'id':
+            #     value = index
+            value = getattr(publication, pub_field['attribute'], None)
+            row.append(value)
+        pub_sheet.append(row)
+    redimension_cell_width(pub_sheet)
+
+    # write sexual states
+    sex_sheet = wb.create_sheet("Sexual states")
+    for sex_state in sorted(list(sexual_states)):
+        sex_sheet.append([sex_state])
+    redimension_cell_width(sex_sheet)
+
+    # write genetic markers
+    markers_sheet = wb.create_sheet("Genomic information")
+    markers_sheet.append(['Strain AN', 'Marker', 'INSDC AN', 'Sequence'])
+    for strain_id, markers in genomic_markers.items():
+        for marker in markers:
+            row = [strain_id, marker.marker_type, marker.marker_id, marker.marker_seq]
+            markers_sheet.append(row)
+    redimension_cell_width(markers_sheet)
+
+    del wb["Sheet"]
+    wb.save(str(path))
+
+
+def _deserialize_strains(strains, locations, growth_media_indexes,
+                         publications, sexual_states, genomic_markers):
+    for strain in strains:
+        strain_row = []
+        for field in MIRRI_FIELDS:
+            attribute = field["attribute"]
+
+            if attribute == "id":
+                value = strain.id.strain_id
+            elif attribute == "restriction_on_use":
+                value = rgetattr(strain, attribute)
+                if value is not None:
+                    value = REV_RESTRICTION_USE_TRANSLATOR[value]
+            elif attribute == "nagoya_protocol":
+                value = rgetattr(strain, attribute)
+                if value:
+                    value = REV_NAGOYA_TRANSLATOR[value]
+            elif attribute == "other_numbers":
+                value = rgetattr(strain, attribute)
+                if value is not None:
+                    value = [f"{on.collection} {on.number}" for on in value]
+                    value = "; ".join(value)
+            elif attribute == 'other_denominations':
+                od = strain.other_denominations
+                value = "; ".join(od) if od else None
+            elif attribute in (
+                "is_from_registered_collection",
+                "is_subject_to_quarantine",
+                "is_potentially_harmful",
+                "genetics.gmo",
+                "taxonomy.interspecific_hybrid"
+            ):
+                value = rgetattr(strain, attribute)
+                if value is True:
+                    value = 2
+                elif value is False:
+                    value = 1
+                else:
+                    value = None
+            elif attribute == "taxonomy.taxon_name":
+                value = strain.taxonomy.long_name
+            elif attribute in ("deposit.date", "collect.date", "isolation.date",
+                               'catalog_inclusion_date'):
+                value = rgetattr(strain, attribute)
+                value = value.strfdate if value else None
+            elif attribute == "growth.recommended_media":
+                value = rgetattr(strain, attribute)
+                if value is not None:
+                    for gm in value:
+                        gm = str(gm)
+                        if gm not in growth_media_indexes:
+                            print(gm, growth_media_indexes)
+                            msg = f"Growth media {gm} not in the provided ones"
+                            continue
+                            raise ValueError(msg)
+                    value = "/".join(value)
+            elif attribute in ('growth.tested_temp_range',
+                               "growth.recommended_temp"):
+                value = rgetattr(strain, attribute)
+                if value:
+                    value = f'{value["min"]}; {value["max"]}'
+            elif attribute == "form_of_supply":
+                value = rgetattr(strain, attribute)
+                value = ";".join(value)
+            elif attribute == "collect.location.coords":
+                lat = strain.collect.location.latitude
+                long = strain.collect.location.longitude
+                if lat is not None and long is not None:
+                    value = f"{lat};{long}"
+                else:
+                    value = None
+
+            elif attribute == "collect.location":
+                location = strain.collect.location
+                loc_index = _build_location_index(location)
+                if loc_index is None:
+                    continue
+                if loc_index not in locations:
+                    locations[loc_index] = location
+                value = loc_index
+            elif attribute in ("abs_related_files", "mta_files"):
+                value = rgetattr(strain, attribute)
+                value = ";".join(value) if value else None
+            elif attribute == "taxonomy.organism_type":
+                value = rgetattr(strain, attribute)
+                if value:
+                    value = "; ".join([str(v.code) for v in value])
+
+            elif attribute == "history":
+                value = rgetattr(strain, attribute)
+                if value is not None:
+                    value = " < ".join(value)
+            elif attribute == "genetics.sexual_state":
+                value = rgetattr(strain, attribute)
+                if value:
+                    sexual_states.add(value)
+            elif attribute == "genetics.ploidy":
+                value = rgetattr(strain, attribute)
+            elif attribute == "taxonomy.organism_type":
+                organism_types = rgetattr(strain, attribute)
+                if organism_types is not None:
+                    value = [org_type.code for org_type in organism_types]
+                    value = ";".join(value)
+            elif attribute == 'publications':
+                value = []
+                for pub in strain.publications:
+                    value.append(pub.id)
+                    if pub.id not in publications:
+                        publications[pub.id] = pub
+                value = ';'.join(str(v) for v in value) if value else None
+            elif attribute == 'genetics.plasmids':
+                value = rgetattr(strain, attribute)
+                if value is not None:
+                    value = ';'.join(value)
+            else:
+                value = rgetattr(strain, attribute)
+
+            strain_row.append(value)
+        genomic_markers[strain.id.strain_id] = strain.genetics.markers
+        yield strain_row
+
+
+def _build_location_index(location):
+    index = []
+    if location.country:
+        index.append(location.country)
+    if location.site:
+        index.append(location.site)
+    return ';'.join(index) if index else None
+
+
+def write_markers_sheet(wb):
+    sheet = wb.create_sheet("Markers")
+    _write_work_sheet(
+        sheet,
+        labels=[f["label"] for f in MARKER_FIELDS],
+        attributes=[f["attribute"] for f in MARKER_FIELDS],
+        data=MARKER_DATA,
+    )
+    redimension_cell_width(sheet)
+
+
+def write_ontobiotopes(workbook, ontobiotype_path):
+    ws = workbook.create_sheet("Ontobiotope")
+    with ontobiotype_path.open() as fhand:
+        for row in csv.reader(fhand, delimiter="\t"):
+            ws.append(row)
+    redimension_cell_width(ws)
+
+
+def _write_work_sheet(sheet, labels, attributes, data):
+    sheet.append(labels)
+    for row in data:
+        row_data = [row[field] for field in attributes]
+        sheet.append(row_data)
+
+    redimension_cell_width(sheet)
+
+
+def write_growth_media(wb, growth_media):
+    ws = wb.create_sheet(GROWTH_MEDIA)
+    ws.append(["Acronym", "Description", "Full description"])
+    for growth_medium in growth_media:
+        row = [
+            growth_medium.acronym,
+            growth_medium.description,
+            growth_medium.full_description,
+        ]
+        ws.append(row)
+    redimension_cell_width(ws)
+
+
+def redimension_cell_width(ws):
+    dims = {}
+    for row in ws.rows:
+        for cell in row:
+            if cell.value:
+                max_ = max((dims.get(cell.column_letter, 0), len(str(cell.value))))
+                dims[cell.column_letter] = max_
+    for col, value in dims.items():
+        ws.column_dimensions[col].width = value
--- a/mirri/settings.py
+++ b/mirri/settings.py
@ -0,0 +1,296 @@
+from pathlib import Path
+
+DATA_DIR = Path(__file__).parent / "data"
+
+ACCESSION_NUMBER = "accession_number"
+RESTRICTION_ON_USE = "restriction_on_use"
+NAGOYA_PROTOCOL = "nagoya_protocol"
+ABS_RELATED_FILES = "abs_related_files"
+MTA_FILES = "mta_file"
+OTHER_CULTURE_NUMBERS = "other_culture_collection_numbers"
+STRAIN_FROM_REGISTERED_COLLECTION = "strain_from_a_registered_collection"
+RISK_GROUP = "risk_group"
+DUAL_USE = "dual_use"
+QUARANTINE = "quarantine"
+ORGANISM_TYPE = "organism_type"
+TAXON_NAME = "taxon_name"
+INFRASUBSPECIFIC_NAME = "infrasubspecific_names"
+COMMENTS_ON_TAXONOMY = "comments_on_taxonomy"
+STATUS = "status"
+HISTORY_OF_DEPOSIT = "history_of_deposit"
+DEPOSITOR = "depositor"
+DATE_OF_DEPOSIT = "date_of_deposit"
+COLLECTED_BY = "collected_by"
+DATE_OF_COLLECTION = "date_of_collection"
+ISOLATED_BY = "isolated_by"
+DATE_OF_ISOLATION = "date_of_isolation"
+DATE_OF_INCLUSION = "date_of_inclusion_on_catalog"
+TESTED_TEMPERATURE_GROWTH_RANGE = "tested_temperature_growth_range"
+RECOMMENDED_GROWTH_TEMP = "recommended_growth_temperature"
+RECOMMENDED_GROWTH_MEDIUM = "recommended_media_for_growth"
+FORM_OF_SUPPLY = "form_of_supply"
+GEO_COORDS = "coordinates_of_geographic_origin"
+ACCESSION_NAME = "other_denomination"
+ALTITUDE = "altitude_of_geographic_origin"
+GEOGRAPHIC_ORIGIN = "geographic_origin"
+GMO = "gmo"
+GMO_CONSTRUCTION_INFO = "gmo_construction_information"
+MUTANT_INFORMATION = "mutant_information"
+GENOTYPE = "genotype"
+LITERATURE = "literature"
+SEXUAL_STATE = "sexual_state"
+PLOIDY = "ploidy"
+INTERSPECIFIC_HYBRID = "interspecific_hybrid"
+HYBRIDS = 'hybrids'
+PLANT_PATHOGENICITY_CODE = "plant_pathogenicity_code"
+PATHOGENICITY = "pathogenicity"
+ENZYME_PRODUCTION = "enzyme_production"
+PRODUCTION_OF_METABOLITES = "production_of_metabolites"
+APPLICATIONS = "applications"
+REMARKS = "remarks"
+PLASMIDS = "plasmids"
+PLASMIDS_COLLECTION_FIELDS = "plasmids_collections_fields"
+SUBSTRATE_HOST_OF_ISOLATION = "substrate_host_of_isolation"
+ISOLATION_HABITAT = "isolation_habitat"
+ONTOBIOTOPE_ISOLATION_HABITAT = "ontobiotope_term_for_the_isolation_habitat"
+LITERATURE_LINKED_TO_SEQ_GENOME = "literature_linked_to_the_sequence_genome"
+
+# StrainId
+STRAIN_ID = "id"
+COLLECTION_CODE = "collection_code"
+STRAIN_PUI = "strain_pui"
+STRAIN_URL = "strain_url"
+
+ID_SYNONYMS = 'id_synonyms'
+# Taxonomy
+GENUS = "genus"
+SPECIES = "species"
+
+# Location
+COUNTRY = "countryOfOriginCode"
+SITE = "site"
+STATE = "state"
+PROVINCE = "province"
+MUNICIPALITY = "municipality"
+ISLAND = "island"
+OTHER = "other"
+LATITUDE = "latitude"
+LONGITUDE = "longitude"
+ALTITUDE = "altitude"
+GEOREF_METHOD = "georeferencingMethod"
+COORDUNCERTAINTY = "coordUncertainty"
+COORD_SPATIAL_REFERENCE = "coordenatesSpatialReference"
+LOCATION = "location"
+
+ALLOWED_COLLECTING_SITE_KEYS = [
+    COUNTRY,
+    STATE,
+    PROVINCE,
+    ISLAND,
+    MUNICIPALITY,
+    OTHER,
+    SITE,
+    LATITUDE,
+    LONGITUDE,
+    ALTITUDE,
+    GEOREF_METHOD,
+    COORDUNCERTAINTY,
+    COORD_SPATIAL_REFERENCE,
+]
+
+MIRRI_FIELDS = [
+    {"attribute": "id", "label": "Accession number"},
+    {"attribute": "restriction_on_use", "label": "Restrictions on use"},
+    {"attribute": "nagoya_protocol",
+        "label": "Nagoya protocol restrictions and compliance conditions"},
+    {"attribute": ABS_RELATED_FILES, "label": "ABS related files"},
+    {"attribute": "mta_files", "label": "MTA file"},
+    {"attribute": "other_numbers", "label": "Other culture collection numbers"},
+    {"attribute": "is_from_registered_collection",
+        "label": "Strain from a registered collection"},
+    {"attribute": "risk_group", "label": "Risk Group"},
+    {"attribute": "is_potentially_harmful", "label": "Dual use"},
+    {"attribute": "is_subject_to_quarantine", "label": "Quarantine in Europe"},
+    {"attribute": "taxonomy.organism_type", "label": "Organism type"},
+    {"attribute": "taxonomy.taxon_name", "label": "Taxon name"},
+    {"attribute": "taxonomy.infrasubspecific_name",
+        "label": "Infrasubspecific names"},
+    {"attribute": "taxonomy.comments", "label": "Comment on taxonomy"},
+    {"attribute": "taxonomy.interspecific_hybrid",
+        "label": "Interspecific hybrid"},
+    {"attribute": "status", "label": "Status"},
+    {"attribute": "history", "label": "History of deposit", },
+    {"attribute": "deposit.who", "label": "Depositor"},
+    {"attribute": "deposit.date", "label": "Date of deposit"},
+    {"attribute": "catalog_inclusion_date",
+        "label": "Date of inclusion in the catalogue"},
+    {"attribute": "collect.who", "label": "Collected by"},
+    {"attribute": "collect.date", "label": "Date of collection"},
+    {"attribute": "isolation.who", "label": "Isolated by"},
+    {"attribute": "isolation.date", "label": "Date of isolation"},
+    {"attribute": "isolation.substrate_host_of_isolation",
+        "label": "Substrate/host of isolation"},
+    {"attribute": "growth.tested_temp_range",
+        "label": "Tested temperature growth range"},
+    {"attribute": "growth.recommended_temp",
+        "label": "Recommended growth temperature"},
+    {"attribute": "growth.recommended_media",
+        "label": "Recommended medium for growth"},
+    {"attribute": "form_of_supply", "label": "Form of supply"},
+    {"attribute": "other_denominations", "label": "Other denomination"},
+    {"attribute": "collect.location.coords",
+        "label": "Coordinates of geographic origin"},
+    {"attribute": "collect.location.altitude",
+        "label": "Altitude of geographic origin"},
+    {"attribute": "collect.location", "label": "Geographic origin"},
+    {"attribute": "collect.habitat", "label": "Isolation habitat"},
+    {"attribute": "collect.habitat_ontobiotope",
+        "label": "Ontobiotope term for the isolation habitat"},
+    {"attribute": "genetics.gmo", "label": "GMO"},
+    {"attribute": "genetics.gmo_construction",
+        "label": "GMO construction information"},
+    {"attribute": "genetics.mutant_info", "label": "Mutant information"},
+    {"attribute": "genetics.genotype", "label": "Genotype"},
+    {"attribute": "genetics.sexual_state", "label": "Sexual state"},
+    {"attribute": "genetics.ploidy", "label": "Ploidy"},
+    {"attribute": "genetics.plasmids", "label": "Plasmids"},
+    {"attribute": "genetics.plasmids_in_collections",
+        "label": "Plasmids collections fields"},
+    {"attribute": "publications", "label": "Literature"},
+    {"attribute": PLANT_PATHOGENICITY_CODE, "label": "Plant pathogenicity code"},
+    {"attribute": "pathogenicity", "label": "Pathogenicity"},
+    {"attribute": "enzyme_production", "label": "Enzyme production"},
+    {"attribute": "production_of_metabolites",
+        "label": "Production of metabolites"},
+    {"attribute": "applications", "label": "Applications", },
+    {"attribute": "remarks", "label": "Remarks"},
+    {"attribute": LITERATURE_LINKED_TO_SEQ_GENOME,
+        "label": "Literature linked to the sequence/genome"},
+]
+
+ALLOWED_SUBTAXA = ["subspecies", "variety", "convarietas", "group", "forma",
+                   'forma.specialis']
+ALLOWED_TAXONOMIC_RANKS = ["family", "genus", "species"] + ALLOWED_SUBTAXA
+
+# nagoya
+NAGOYA_NO_RESTRICTIONS = "no_known_restrictions_under_the_Nagoya_protocol"
+NAGOYA_DOCS_AVAILABLE = "documents_providing_proof_of_legal_access_and_terms_of_use_available_at_the_collection"
+NAGOYA_PROBABLY_SCOPE = "strain_probably_in_scope,_please_contact_the_culture_collection"
+
+ALLOWED_NAGOYA_OPTIONS = [NAGOYA_NO_RESTRICTIONS,
+                          NAGOYA_DOCS_AVAILABLE, NAGOYA_PROBABLY_SCOPE]
+
+# Use restriction
+NO_RESTRICTION = "no_restriction"
+ONLY_RESEARCH = "only_research"
+COMMERCIAL_USE_WITH_AGREEMENT = "commercial_use_with_agreement"
+
+ALLOWED_RESTRICTION_USE_OPTIONS = [
+    NO_RESTRICTION,
+    ONLY_RESEARCH,
+    COMMERCIAL_USE_WITH_AGREEMENT,
+]
+
+ALLOWED_RISK_GROUPS = ["1", "2", "3", "4"]
+
+AGAR = "Agar"
+CRYO = "Cryo"
+DRY_ICE = "Dry Ice"
+LIQUID_CULTURE_MEDIUM = "Liquid Culture Medium"
+LYO = "Lyo"
+OIL = "Oil"
+WATER = "Water"
+ALLOWED_FORMS_OF_SUPPLY = [AGAR, CRYO, DRY_ICE,
+                           LIQUID_CULTURE_MEDIUM, LYO, OIL, WATER]
+
+DEPOSIT = "deposit"
+ISOLATION = "isolation"
+COLLECT = "collect"
+GROWTH = "growth"
+GENETICS = "genetics"
+TAXONOMY = "taxonomy"
+# Markers
+MARKERS = "markers"
+MARKER_TYPE = "marker_type"
+MARKER_INSDC = "INSDC"
+MARKER_SEQ = "marker_seq"
+ALLOWED_MARKER_TYPES = [
+    {"acronym": "16S rRNA", "marker": "16S rRNA"},
+    {"acronym": "ACT", "marker": "Actin"},
+    {"acronym": "CaM", "marker": "Calmodulin"},
+    {"acronym": "EF-1α", "marker": "elongation factor 1-alpha (EF-1α)"},
+    {"acronym": "ITS",
+        "marker": "nuclear ribosomal Internal Transcribed Spacer (ITS)"},
+    {"acronym": "LSU", "marker": "nuclear ribosomal Large SubUnit (LSU)"},
+    {"acronym": "RPB1", "marker": "Ribosomal RNA-coding genes RPB1"},
+    {"acronym": "RPB2", "marker": "Ribosomal RNA-coding genes RPB2"},
+    {"acronym": "TUBB", "marker": "β-Tubulin"},
+]
+
+PUBLICATIONS = "publications"
+PUB_ID = "id"
+PUB_DOI = "pub_doi"
+PUB_PUBMED_ID = ''
+PUB_FULL_REFERENCE = "full_reference"
+PUB_TITLE = "title"
+PUB_AUTHORS = "authors"
+PUB_JOURNAL = "journal"
+PUB_YEAR = "year"
+PUB_VOLUME = "volume"
+PUB_ISSUE = "issue"
+PUB_FIRST_PAGE = "first_page"
+PUB_LAST_PAGE = "last_page"
+BOOK_TITLE = "book_title"
+BOOK_EDITOR = "book_editor"
+BOOK_PUBLISHER = "book_publisher"
+
+
+PUBLICATION_FIELDS = [
+    {"label": "ID", "attribute": PUB_ID},
+    {"label": "Full reference", "attribute": PUB_FULL_REFERENCE},
+    {"label": "Authors", "attribute": PUB_AUTHORS},
+    {"label": "Title", "attribute": PUB_TITLE},
+    {"label": "Journal", "attribute": PUB_JOURNAL},
+    {"label": "Year", "attribute": PUB_YEAR},
+    {"label": "Volume", "attribute": PUB_VOLUME},
+    {"label": "Issue", "attribute": PUB_ISSUE},
+    {"label": "First page", "attribute": PUB_FIRST_PAGE},
+    {"label": "Last page", "attribute": PUB_FIRST_PAGE},
+    {"label": "Book title", "attribute": BOOK_TITLE},
+    {"label": "Editors", "attribute": BOOK_EDITOR},
+    {"label": "Publisher", "attribute": BOOK_PUBLISHER},
+]
+
+
+# ploidy
+ANEUPLOID = 0
+HAPLOID = 1
+DIPLOID = 2
+TRIPLOID = 3
+TETRAPLOID = 4
+POLYPLOID = 9
+
+ALLOWED_PLOIDIES = [ANEUPLOID, HAPLOID, DIPLOID, TRIPLOID, TETRAPLOID,
+                    POLYPLOID]
+
+SUBTAXAS = {
+    "subsp.": "subspecies",
+    "var.": "variety",
+    "convar.": "convarietas",
+    "group.": "group",
+    "f.": "forma",
+    "f.sp.": "forma.specialis"
+}
+
+# Excel sheet name
+LOCATIONS = "Geographic origin"  # 'Locations'
+GROWTH_MEDIA = "Growth media"
+GENOMIC_INFO = "Genomic information"
+STRAINS = "Strains"
+LITERATURE_SHEET = "Literature"
+SEXUAL_STATE_SHEET = "Sexual states"
+RESOURCE_TYPES_VALUES = "Resource types values"
+FORM_OF_SUPPLY_SHEET = "Forms of supply"
+PLOIDY_SHEET = "Ploidy"
+ONTOBIOTOPE = "Ontobiotope"
+MARKERS = "Markers"
--- a/mirri/utils.py
+++ b/mirri/utils.py
@ -0,0 +1,48 @@
+import pycountry
+
+
+class FakeCountry:
+    def __init__(self, name=None, code3=None):
+        self.code3 = code3
+        self.name = name
+
+
+def get_pycountry(value):
+    if value == 'INW':
+        return FakeCountry(name='International Water', code3='INW')
+
+    country = get_country_from_name(value)
+    if country is None:
+        country = get_country_from_alpha3(value)
+    return country
+
+
+def get_country_from_name(name):
+    country = pycountry.countries.get(name=name)
+    try:
+        if country is None:
+            country = pycountry.countries.get(common_name=name)
+        if country is None:
+            country = pycountry.countries.get(official_name=name)
+        if country is None:
+            country = pycountry.historic_countries.get(name=name)
+        if country is None:
+            country = pycountry.historic_countries.get(common_name=name)
+        if country is None:
+            country = pycountry.historic_countries.get(official_name=name)
+    except (AttributeError, KeyError):
+        country = None
+
+    return country
+
+
+def get_country_from_alpha3(code):
+    country = pycountry.countries.get(alpha_3=code)
+    try:
+        if country is None:
+            country = pycountry.historic_countries.get(alpha_3=code)
+
+    except (AttributeError, KeyError):
+        country = None
+
+    return country
--- a/mirri/validation/init.py
+++ b/mirri/validation/init.py
--- a/mirri/validation/entity_validators.py
+++ b/mirri/validation/entity_validators.py
@ -0,0 +1,50 @@
+from mirri import rgetattr
+
+
+def validate_strain(strain, version='20200601'):
+    if version == '20200601':
+        return _validate_strain_v20200601(strain)
+    raise NotImplementedError('Only v20200601 is implemented')
+
+
+def _validate_strain_v20200601(strain):
+    mandatory_attrs = [{'label': 'Accession Number', 'attr': 'id.strain_id'},
+                       {'label': 'Nagoya protocol', 'attr': 'nagoya_protocol'},
+                       {'label': 'Restriction on use', 'attr': 'restriction_on_use'},
+                       {'label': 'Risk group', 'attr': 'risk_group'},
+                       {'label': 'Organism type', 'attr': 'taxonomy.organism_type'},
+                       {'label': 'Taxon name', 'attr': 'taxonomy.long_name'},
+                       {'label': 'Recommended temperature to growth', 'attr': 'growth.recommended_temp'},
+                       {'label': 'Recommended media', 'attr': 'growth.recommended_media'},
+                       {'label': 'Form of supply', 'attr': 'form_of_supply'},
+                       {'label': 'Country', 'attr': 'collect.location.country'}]
+
+    errors = []
+
+    for mandatory in mandatory_attrs:
+        value = rgetattr(strain, mandatory['attr'])
+        if value is None:
+            errors.append(f"{mandatory['label']} is mandatory field")
+
+    if not is_valid_nagoya(strain):
+        errors.append('Not compliant wih nagoya protocol requirements')
+
+    return errors
+
+
+def is_valid_nagoya(strain):
+    # nagoya_requirements
+    _date = strain.collect.date
+    if _date is None:
+        _date = strain.isolation.date
+    if _date is None:
+        _date = strain.deposit.date
+    if _date is None:
+        _date = strain.catalog_inclusion_date
+    # print(_date)
+    year = None if _date is None else _date._year
+
+    if year is not None and year >= 2014 and strain.collect.location.country is None:
+        return False
+
+    return True
--- a/mirri/validation/error_logging/init.py
+++ b/mirri/validation/error_logging/init.py
@ -0,0 +1,3 @@
+from .error import Entity, Error
+from .error_message import ErrorMessage
+from .error_log import ErrorLog
--- a/mirri/validation/error_logging/error.py
+++ b/mirri/validation/error_logging/error.py
@ -0,0 +1,119 @@
+from typing import Optional
+from .error_message import ErrorMessage
+
+
+class Entity():
+    """Entity information
+
+    Args:
+        acronym: acronym of the entity. Must be a 3-characters captalized string
+    """
+
+    def __init__(self, acronym: str) -> None:
+        self.acronym = acronym
+
+    def __str__(self) -> str:
+        return f"Entity {self.acronym}: {self.name}"
+
+    @property
+    def _acronyms(self) -> list:
+        return [
+            func
+            for func in dir(self)
+            if func.isupper() and
+            callable(getattr(self, func)) and
+            not func.startswith("__")
+        ]
+
+    @property
+    def _names(self) -> dict:
+        return {acr: getattr(self, acr)() for acr in self._acronyms}
+
+    @property
+    def name(self) -> str:
+        try:
+            return self._names[self.acronym]
+        except KeyError:
+            raise KeyError(f'Unknown acronym {self.acronym}.')
+
+    @property
+    def acronym(self) -> str:
+        return self._acronym
+
+    @acronym.setter
+    def acronym(self, acronym: str) -> None:
+        self._acronym = acronym
+
+    def EFS(self) -> str:
+        return 'Excel File Structure'
+
+    def GMD(self) -> str:
+        return 'Growth Media'
+
+    def GOD(self) -> str:
+        return 'Geographic Origin'
+
+    def LID(self) -> str:
+        return 'Literature'
+
+    def STD(self) -> str:
+        return 'Strains'
+
+    def GID(self) -> str:
+        return 'Genomic Information'
+
+    def OTD(self) -> str:
+        return 'Ontobiotope'
+
+    def UCT(self) -> str:
+        return 'Uncategorized'
+
+
+class Error():
+    """Error information
+
+        Args:
+            message (str): Error message
+            entity (Entity, optional): Entity related to the error. If None will default to Uncategorized. Defaults to None.
+            data (str, optional): Data used for sorting the messages. Defaults to None.
+    """
+
+    def __init__(self, code: str, pk: Optional[str] = None, data: Optional[str] = None) -> None:
+        self.code = code.upper()
+        self.pk = pk
+        self.data = data
+
+    def __str__(self):
+        return f"Error {self._code}: {self.message}"
+
+    @property
+    def code(self) -> str:
+        return self._code
+
+    @code.setter
+    def code(self, code: str) -> None:
+        self._code = code.upper()
+
+    @property
+    def pk(self) -> Optional[str]:
+        return self._pk
+
+    @pk.setter
+    def pk(self, pk: Optional[str] = None) -> None:
+        self._pk = pk
+
+    @property
+    def data(self) -> Optional[str]:
+        return self._data
+
+    @data.setter
+    def data(self, data: Optional[str]):
+        self._data = data
+
+    @property
+    def entity(self) -> Entity:
+        return Entity(self.code[:3])
+
+    @property
+    def message(self) -> str:
+        return ErrorMessage(self.code, self.pk, self.data).message
--- a/mirri/validation/error_logging/error_log.py
+++ b/mirri/validation/error_logging/error_log.py
@ -0,0 +1,77 @@
+from typing import Optional, Union
+from datetime import datetime
+from .error import Error
+
+
+class ErrorLog():
+    def __init__(self, input_filename: str, cc: Optional[str] = None, date: Optional[Union[str, datetime]] = None, limit: int = 100):
+        """
+        Logger for Error instances.
+
+        Args:
+            input_filename (str): name of the file to be logged
+            cc (str, optional): name of the curator. Defaults to None.
+            date (str, optional): date (e.g. created, last modified) associated with the file. Useful for versioning. Defaults to None.
+            limit (int, optional): limit of errors to print to the report. Defaults to 100.
+        """
+        self._input_filename = input_filename
+        self._cc = cc
+        self._date = date
+        self._errors = {}
+        self.limit = limit
+        self._counter = 0
+
+    def __str__(self) -> str:
+        output = f"""Error Log for file {self._input_filename}\nENTITY | CODE   | MESSAGE"""
+        for acronym, error_list in self.get_errors().items():
+            for error in error_list:
+                output += f"\n{acronym:6} | {error.code:6} | {error.message[:100]}"
+        return output
+
+    @property
+    def input_filename(self) -> str:
+        return self._input_filename
+
+    @input_filename.setter
+    def input_filename(self, input_filename: str) -> None:
+        self._input_filename = input_filename
+
+    @property
+    def cc(self) -> Optional[str]:
+        return self._cc
+
+    @cc.setter
+    def cc(self, cc: Optional[str]) -> None:
+        self._cc = cc
+
+    @property
+    def date(self) -> Optional[Union[str, datetime]]:
+        return self._date
+
+    @date.setter
+    def date(self, date: Optional[Union[str, datetime]] = None) -> None:
+        if isinstance(date, str):
+            self._date = datetime.strptime(date, r'%d-%m-%Y')
+        else:
+            self._date = date
+
+    def get_errors(self) -> dict:
+        """
+        Get all errors
+
+        Returns:
+            dict: Error intances grouped by entity acronym.
+        """
+        return self._errors
+
+    def add_error(self, error: Error) -> None:
+        """
+        Add an error.
+
+        Args:
+            error (Error): Error instance.
+        """
+        if error.entity.acronym not in self._errors:
+            self._errors[error.entity.acronym] = [error]
+        else:
+            self._errors[error.entity.acronym].append(error)
--- a/mirri/validation/error_logging/error_message.py
+++ b/mirri/validation/error_logging/error_message.py
@ -0,0 +1,408 @@
+from typing import Optional
+
+
+class ErrorMessage():
+    """Error message
+
+    Args:
+        code (str): Error code.
+        pk (str | optional): The instance's primary key that triggered the error. Defaults to None.
+        value (str | optional): The instance's value that triggered the error. Defaults to None.
+    """
+
+    def __init__(self, code: str, pk: Optional[str] = None, value: Optional[str] = None):
+        self.code = code.upper()
+        self.pk = pk
+        self.value = value
+
+    @property
+    def _codes(self) -> list:
+        return [
+            func
+            for func in dir(self)
+            if func.isupper() and
+            callable(getattr(self, func)) and
+            not func.startswith("__")
+        ]
+
+    @property
+    def _messages(self) -> dict:
+        return {code: getattr(self, code) for code in self._codes}
+
+    @property
+    def message(self) -> str:
+        if not self._validate_code():
+            raise ValueError(f"{self.code} not found")
+        return self._messages[self.code]()
+
+    @property
+    def code(self) -> str:
+        return self._code
+
+    @code.setter
+    def code(self, code: str) -> None:
+        self._code = code.upper()
+
+    def _validate_code(self) -> bool:
+        return self.code in self._codes
+
+    @property
+    def pk(self) -> str:
+        return self._pk
+
+    @pk.setter
+    def pk(self, pk: str) -> None:
+        self._pk = pk
+
+    @property
+    def value(self) -> str:
+        return self._value
+
+    @value.setter
+    def value(self, value: str) -> None:
+        self._value = value
+
+    """
+        Excel File Structure Error Codes
+    """
+
+    def EXL00(self):
+        return f"The provided file '{self.pk}' is not an excel(xlsx) file"
+
+    def EFS01(self):
+        return "The 'Growth media' sheet is missing. Please check the provided excel template."
+
+    def EFS02(self):
+        return "The 'Geographic origin' sheet is missing. Please check the provided excel template."
+
+    def EFS03(self):
+        return "The 'Literature' sheet is missing. Please check the provided excel template."
+
+    def EFS04(self):
+        return "The 'Sexual state' sheet is missing. Please check the provided excel template."
+
+    def EFS05(self):
+        return "The 'Strains' sheet is missing. Please check the provided excel template."
+
+    def EFS06(self):
+        return "The 'Ontobiotope' sheet is missing. Please check the provided excel template."
+
+    def EFS07(self):
+        return "The 'Markers' sheet is missing. Please check the provided excel template."
+
+    def EFS08(self):
+        return "The 'Genomic information' sheet is missing. Please check the provided excel template."
+
+    """
+        Growth Media Error Codes
+    """
+
+    def GMD01(self):
+        return "The 'Acronym' column is a mandatory field in the Growth Media sheet."
+
+    def GMD02(self):
+        return "The 'Acronym' column is empty or has missing values."
+
+    def GMD03(self):
+        return "The 'Description' column is a mandatory field in the Growth Media sheet. The column can not be empty."
+
+    def GMD04(self):
+        return f"The 'Description' for growth media with Acronym {self.pk} is missing."
+
+    """
+        Geographic Origin Error Codes
+    """
+
+    def GOD01(self):
+        return "The 'ID' column is a mandatory field in the Geographic Origin sheet."
+
+    def GOD02(self):
+        return "The 'ID' column is empty or has missing values."
+
+    def GOD03(self):
+        return "The 'Country' column is a mandatory field in the Geographic Origin sheet. The column can not be empty."
+
+    def GOD04(self):
+        return f"The 'Country' for geographic origin with ID {self.pk} is missing."
+
+    def GOD05(self):
+        return f"The 'Country' for geographic origin with ID {self.pk} is incorrect."
+
+    def GOD06(self):
+        return f"The 'Locality' column is a mandatory field in the Geographic Origin sheet. The column can not be empty."
+
+    def GOD07(self):
+        return f"The 'Locality' for geographic origin with ID {self.pk} is missing."
+
+    """
+        Literature Error Codes
+    """
+
+    def LID01(self):
+        return "The 'ID' column is a mandatory field in the Literature sheet."
+
+    def LID02(self):
+        return "The 'ID' column empty or missing values."
+
+    def LID03(self):
+        return "The 'Full reference' column is a mandatory field in the Literature sheet. The column can not be empty."
+
+    def LID04(self):
+        return f"The 'Full reference' for literature with ID {self.pk} is missing."
+
+    def LID05(self):
+        return "The 'Authors' column is a mandatory field in the Literature sheet. The column can not be empty."
+
+    def LID06(self):
+        return f"The 'Authors' for literature with ID {self.pk} is missing."
+
+    def LID07(self):
+        return "The 'Title' column is a mandatory field in the Literature sheet. The column can not be empty."
+
+    def LID08(self):
+        return f"The 'Title' for literature with ID {self.pk} is missing."
+
+    def LID09(self):
+        return "The 'Journal' column is a mandatory field in the Literature sheet. The column can not be empty."
+
+    def LID10(self):
+        return f"The 'Journal' for literature with ID {self.pk} is missing."
+
+    def LID11(self):
+        return "The 'Year' column is a mandatory field in the Literature sheet. The column can not be empty."
+
+    def LID12(self,):
+        return f"The 'Year' for literature with ID {self.pk} is missing."
+
+    def LID13(self):
+        return "The 'Volume' column is a mandatory field in the Literature sheet. The column can not be empty."
+
+    def LID14(self):
+        return f"The 'Volume' for literature with ID {self.pk} is missing."
+
+    def LID15(self):
+        return "The 'First page' column is a mandatory field. The column can not be empty."
+
+    def LID16(self):
+        return f"The 'First page' for literature with ID {self.pk} is missing."
+
+    def LID17(self):
+        msg = 'If journal; Title, Authors, journal, year and first page are required'
+        msg += 'If Book; Book Title, Authors, Year, Editors, Publishers'
+        return msg
+
+    """
+        Strains Error Codes
+    """
+
+    def STD01(self):
+        return "The 'Accession number' column is a mandatory field in the Strains sheet."
+
+    def STD02(self):
+        return "The 'Accession number' column is empty or has missing values."
+
+    def STD03(self):
+        return f"The 'Accesion number' must be unique. The '{self.value}' is repeated."
+
+    def STD04(self):
+        return (f"The 'Accession number' {self.pk} is not according to the specification."
+                " The value must be of the format '<Sequence of characters> <sequence of characters>'.")
+
+    def STD05(self):
+        return f"The 'Restriction on use' column is a mandatory field in the Strains Sheet. The column can not be empty."
+
+    def STD06(self):
+        return f"The 'Restriction on use' for strain with Accession Number {self.pk} is missing."
+
+    def STD07(self):
+        return (f"The 'Restriction on use' for strain with Accession Number {self.pk} is not according to the specification."
+                f" Your value is {self.value} and the accepted values are 1, 2, 3.")
+
+    def STD08(self):
+        return f"The 'Nagoya protocol restrictions and compliance conditions' column is a mandatory field in the Strains Sheet. The column can not be empty."
+
+    def STD09(self):
+        return f"The 'Nagoya protocol restrictions and compliance conditions' for strain with Accession Number {self.pk} is missing."
+
+    def STD10(self):
+        return (f"The 'Nagoya protocol restrictions and compliance conditions' for strain with Accession Number {self.pk} is not according to the specification."
+                f" Your value is {self.value} and the accepted values are 1, 2, 3.")
+
+    def STD11(self):
+        return (f"The 'Strain from a registered collection' for strain with Accession Number {self.pk} is not according to specification."
+                f" Your value is {self.value} and the accepted values are 1, 2, 3.")
+
+    def STD12(self):
+        return "The 'Risk group' column is a mandatory field in the Strains Sheet. The column can not be empty."
+
+    def STD13(self):
+        return f"The 'Risk group' for strain with Accession Number {self.pk} is missing."
+
+    def STD14(self):
+        return (f"The 'Risk group' for strain with Accession Number {self.pk} is not according to specification."
+                f" Your value is {self.value} and the accepted values are 1, 2, 3, 4.")
+
+    def STD15(self):
+        return (f"The 'Dual use' for strain with Accession Number {self.pk} is not according to specification."
+                f" Your value is {self.value} and the accepted values are 1, 2.")
+
+    def STD16(self):
+        return (f"The “Quarantine in europe” for strain with Accession Number {self.pk} is not according to specification."
+                f" Your value is {self.value} and the accepted values are 1, 2.")
+
+    def STD17(self):
+        return f"The 'Organism type' column is a mandatory field in the Strains Sheet. The column can not be empty."
+
+    def STD18(self):
+        return f"The 'Organism type' for strain with Accession Number {self.pk} is missing."
+
+    def STD19(self):
+        return (f"The 'Organism type' for strain with Accession Number {self.pk} is not according to specification."
+                f" Your value is {self.value} and the accepted values are 'Algae', 'Archaea', 'Bacteria', 'Cyanobacteria', "
+                "'Filamentous Fungi',  'Phage', 'Plasmid', 'Virus', 'Yeast', 1, 2, 3, 4, 5, 6, 7, 8, 9.")
+
+    def STD20(self):
+        return f"The 'Taxon name' column is a mandatory field in the Strains Sheet. The column can not be empty."
+
+    def STD21(self):
+        return f"The 'Taxon name' for strain with Accession Number {self.pk} is missing."
+
+    def STD22(self):
+        return f"The 'Taxon name' for strain with Accession Number {self.pk} is incorrect."
+
+    def STD23(self):
+        return (f"The 'Interspecific hybrid' for strain with Accession Number {self.pk} is not according to specification."
+                f" Your value is {self.value} and the accepted values are 1, 2.")
+
+    def STD24(self):
+        return f"The 'History of deposit' for strain with Accession Number {self.pk} is incorrect."
+
+    def STD25(self):
+        return (f"The 'Date of deposit' for strain with Accession Number {self.pk} is incorrect."
+                " The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
+
+    def STD26(self):
+        return (f"The 'Date of inclusion in the catalogue' for strain with Accession Number {self.pk} is incorrect."
+                " The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
+
+    def STD27(self):
+        return (f"The 'Date of collection' for strain with Accession Number {self.pk} is incorrect."
+                " The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
+
+    def STD28(self):
+        return (f"The 'Date of isolation' for strain with Accession Number {self.pk} is incorrect."
+                " The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
+
+    def STD29(self):
+        return (f"The 'Tested temperature growth range' for strain with Accession Number {self.pk} is incorrect."
+                " It must have two decimal numbers separated by ','")
+
+    def STD30(self):
+        return f"The 'Recommended growth temperature' column is a mandatory field in the Strains Sheet. The column can not be empty."
+
+    def STD31(self):
+        return f"The 'Recommended growth temperature' for strain with Accession Number {self.pk} is missing."
+
+    def STD32(self):
+        return (f"The 'Recommended growth temperature' for strain with Accession Number {self.pk} is incorrect."
+                " It must have two decimal numbers separated by ','.")
+
+    def STD33(self):
+        return f"The 'Recommended medium for growth' column is a mandatory field in the Strains Sheet. The column can not be empty."
+
+    def STD34(self):
+        return f"The 'Recommended medium for growth' for strain with Accession Number {self.pk} is missing."
+
+    def STD35(self):
+        return f"The value of 'Recommended medium for growth' for strain with Accession Number {self.pk} is not in the Growth Media Sheet."
+
+    def STD36(self):
+        return f"The 'Forms of supply' column is a mandatory field in the Strains Sheet. The column can not be empty."
+
+    def STD37(self):
+        return f"The 'Forms of supply' for strain with Accession Number {self.pk} is missing."
+
+    def STD38(self):
+        return f"The value of 'Forms of supply' for strain with Accession Number {self.pk} is not in the Forms of Supply Sheet."
+
+    def STD39(self):
+        return (f"The 'Coordinates of geographic origin' column for strain with Accession Number {self.pk} is incorrect."
+                "The allowed formats are two or three decimal numbers separated by ','. Moreover, the first number must be"
+                "between [-90, 90], the second between [-180, 180], and the third, if provided, can assume any value.")
+
+    def STD40(self):
+        return (f"The 'Altitude of geographic origin' column for strain with Accession Number {self.pk} is incorrect."
+                "The allowed formats are one decimal number between [-200, 8000].")
+
+    def STD41(self):
+        return f"The value of 'Ontobiotope term for the isolation habitat' for strain with Accession Number {self.pk} is not in the Ontobiotope Sheet."
+
+    def STD42(self):
+        return (f"The 'GMO' for strain with Accession Number {self.pk} is not according to specification."
+                f" Your value is {self.value} and the accepted values are 1, 2")
+
+    def STD43(self):
+        return (f"The 'Sexual State' for strain with Accession Number {self.pk} is not according to specification."
+                f" Your value is {self.value} and the accepted values are 'Mata', 'Matalpha', 'Mata/Matalpha', "
+                "'Matb', 'Mata/Matb', 'MTLa', 'MTLalpha', 'MTLa/MTLalpha', 'MAT1-1', 'MAT1-2', 'MAT1', 'MAT2', 'MT+', 'MT-'")
+
+    def STD44(self):
+        return (f"The 'Ploidy' for strain with Accession Number {self.pk} is not according to specification."
+                f" Your value is {self.value} and the accepted values are 0, 1, 2, 3, 4, 9")
+
+    def STD45(self):
+        msg = f"At least one of the values '{self.value}' of the literature field for strain {self.pk} are not in the literature sheet. "
+        msg += "If the those values are Pubmed ids or DOIs, please ignore this messsage"
+        return msg
+
+
+    """
+        Genomic Information Error Codes
+    """
+
+    def GID01(self):
+        return f"The 'Strain Acession Number' (Strain AN) column is a mandatory field in the Genomic Information Sheet."
+
+    def GID02(self):
+        return f"The 'Strain Acession Number' (Strain AN) column is empty or has missing values."
+
+    def GID03(self):
+        return f"The value of 'Strain Acession Number' (Strain AN) {self.value} is not in the Strains sheet."
+
+    def GID04(self):
+        return f"The 'Marker' column is a mandatory field in the Genomic Information Sheet. The column can not be empty."
+
+    def GID05(self):
+        return f"The 'Marker' for genomic information with Strain AN {self.pk} is missing."
+
+    def GID06(self):
+        return f"The 'Marker' for genomic information with Strain AN {self.pk} is incorrect."
+
+    def GID07(self):
+        return f"The 'INSDC AN' column is a mandatory field in the Genomic Information Sheet. The column can not be empty."
+
+    def GID08(self):
+        return f"The 'INSDC AN' for genomic information with Strain AN {self.pk} is missing."
+
+    def GID09(self):
+        return f"The 'INSDC AN' for genomic information with Strain AN {self.pk} is incorrect."
+
+    def GID10(self):
+        return (f"The 'Sequence' for genomic information with Strain AN {self.pk} is incorrect."
+                " It must be a sequence of 'G', 'T', 'A', 'C' characteres of any length and without white spaces.")
+
+    """
+        Ontobiotope Error Codes
+    """
+
+    def OTD01(self):
+        return "The 'ID' columns is a mandatory field in the Ontobiotope Sheet."
+
+    def OTD02(self):
+        return "The 'ID' columns is empty or has missing values."
+
+    def OTD03(self):
+        return "The 'Name' columns is a mandatory field in the Ontobiotope Sheet. The column can not be empty."
+
+    def OTD04(self):
+        return f"The 'Name' for ontobiotope with ID {self.pk} is missing."
--- a/mirri/validation/excel_validator.py
+++ b/mirri/validation/excel_validator.py
@ -0,0 +1,483 @@
+import re
+from pathlib import Path
+from io import BytesIO
+from zipfile import BadZipfile
+from datetime import datetime
+from calendar import monthrange
+
+from openpyxl import load_workbook
+
+from mirri.io.parsers.excel import workbook_sheet_reader, get_all_cell_data_from_sheet
+from mirri.validation.error_logging import ErrorLog, Error
+from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
+                                   ERROR_CODE, FIELD, MANDATORY, MATCH,
+                                   MISSING, MULTIPLE, NAGOYA, NUMBER, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON,
+                                   TYPE, UNIQUE, VALIDATION, VALUES, BIBLIO)
+from mirri.settings import LOCATIONS, SUBTAXAS
+from mirri.validation.validation_conf_20200601 import MIRRI_20200601_VALLIDATION_CONF
+
+
+def validate_mirri_excel(fhand, version="20200601"):
+    if version == "20200601":
+        configuration = MIRRI_20200601_VALLIDATION_CONF
+    else:
+        raise NotImplementedError("Only version20200601 is implemented")
+
+    return validate_excel(fhand, configuration)
+
+
+def validate_excel(fhand, configuration):
+    validation_conf = configuration['sheet_schema']
+    cross_ref_conf = configuration['cross_ref_conf']
+    in_memory_sheet_conf = configuration['keep_sheets_in_memory']
+    excel_name = Path(fhand.name).stem
+    error_log = ErrorLog(excel_name)
+
+    try:
+        workbook = load_workbook(filename=BytesIO(
+            fhand.read()), read_only=True, data_only=True)
+    except (BadZipfile, IOError):
+        error = Error('EXL00', fhand.name, fhand.name)
+        error_log.add_error(error)
+        return error_log
+
+    # excel structure errors
+    structure_errors = list(validate_excel_structure(workbook, validation_conf))
+    if structure_errors:
+        for error in structure_errors:
+            error = Error(error[ERROR_CODE], pk=error['id'],
+                          data=error['value'])
+            error_log.add_error(error)
+
+        return error_log
+
+    crossrefs = get_all_crossrefs(workbook, cross_ref_conf)
+    in_memory_sheets = get_all_in_memory_sheet(workbook, in_memory_sheet_conf)
+    content_errors = validate_content(workbook, validation_conf,
+                                      crossrefs, in_memory_sheets)
+
+    for error in content_errors:
+        # if error[ERROR_CODE] == 'STD43':
+        #     continue
+        error = Error(error[ERROR_CODE], pk=error['id'], data=error['value'])
+
+        error_log.add_error(error)
+    return error_log
+
+
+def validate_excel_structure(workbook, validation_conf):
+    for sheet_name, sheet_conf in validation_conf.items():
+        mandatory = sheet_conf.get(VALIDATION, {}).get(TYPE, None)
+        mandatory = mandatory == MANDATORY
+
+        error_code = sheet_conf.get(VALIDATION, {}).get(ERROR_CODE, False)
+        try:
+            sheet = workbook[sheet_name]
+        except KeyError:
+            sheet = None
+
+        if sheet is None:
+            if mandatory:
+                yield {'id': None, 'sheet': sheet_name, 'field': None,
+                       'error_code': error_code, 'value': None}
+            continue
+
+        headers = _get_sheet_headers(sheet)
+        for column in sheet_conf.get(COLUMNS):
+            field = column[FIELD]
+            for step in column.get(VALIDATION, []):
+                if step[TYPE] == MANDATORY and field not in headers:
+                    yield {'id': None, 'sheet': sheet_name, 'field': field,
+                           'error_code': step[ERROR_CODE], 'value': None}
+
+
+def _get_sheet_headers(sheet):
+    first_row = next(sheet.iter_rows(min_row=1, max_row=1))
+    return [c.value for c in first_row]
+
+
+def _get_values_from_columns(workbook, sheet_name, columns):
+    indexed_values = {}
+    for row in workbook_sheet_reader(workbook, sheet_name):
+        for col in columns:
+            indexed_values[str(row.get(col))] = ""
+
+    return indexed_values
+
+
+def get_all_crossrefs(workbook, cross_refs_names):
+    crossrefs = {}
+    for ref_name, columns in cross_refs_names.items():
+        if columns:
+            crossrefs[ref_name] = _get_values_from_columns(workbook, ref_name,
+                                                               columns)
+        else:
+            try:
+                crossrefs[ref_name] = get_all_cell_data_from_sheet(workbook, ref_name)
+            except ValueError as error:
+                if 'sheet is missing' in str(error):
+                    crossrefs[ref_name] = []
+                else:
+                    raise
+
+    return crossrefs
+
+
+def get_all_in_memory_sheet(workbook, in_memory_sheet_conf):
+    in_memory_sheets = {}
+    for sheet_conf in in_memory_sheet_conf:
+        sheet_name = sheet_conf['sheet_name']
+        indexed_by = sheet_conf['indexed_by']
+        rows = workbook_sheet_reader(workbook, sheet_name)
+        indexed_rows = {row[indexed_by]: row for row in rows}
+        in_memory_sheets[sheet_name] = indexed_rows
+
+    return in_memory_sheets
+
+
+def validate_content(workbook, validation_conf, crossrefs, in_memory_sheets):
+    for sheet_name in validation_conf.keys():
+        sheet_conf = validation_conf[sheet_name]
+        sheet_id_column = sheet_conf['id_field']
+        shown_values = {}
+        row_validation_steps = sheet_conf.get(ROW_VALIDATION, None)
+        for row in workbook_sheet_reader(workbook, sheet_name):
+            id_ = row.get(sheet_id_column, None)
+            if id_ is None:
+                error_code = _get_missing_row_id_error(sheet_id_column,
+                                                       sheet_conf)
+                yield {'id': id_, 'sheet': sheet_name,
+                       'field': sheet_id_column,
+                       'error_code': error_code, 'value': None}
+                continue
+            do_have_cell_error = False
+            for column in sheet_conf[COLUMNS]:
+                label = column[FIELD]
+                validation_steps = column.get(VALIDATION, None)
+                value = row.get(label, None)
+                if validation_steps:
+                    error_code = validate_cell(value, validation_steps,
+                                               crossrefs, shown_values, label)
+                    if error_code is not None:
+                        do_have_cell_error = True
+                        yield {'id': id_, 'sheet': sheet_name, 'field': label,
+                               'error_code': error_code, 'value': value}
+
+            if not do_have_cell_error and row_validation_steps:
+                error_code = validate_row(
+                    row, row_validation_steps, in_memory_sheets)
+                if error_code is not None:
+                    yield {'id': id_, 'sheet': sheet_name, 'field': 'row',
+                           'error_code': error_code, 'value': 'row'}
+
+
+def _get_missing_row_id_error(sheet_id_column, sheet_conf):
+    error_code = None
+    for column in sheet_conf[COLUMNS]:
+        if column[FIELD] == sheet_id_column:
+            error_code = [step[ERROR_CODE]
+                          for step in column[VALIDATION] if step[TYPE] == MISSING][0]
+    return error_code
+
+
+def validate_row(row, validation_steps, in_memory_sheets):
+    for validation_step in validation_steps:
+        kind = validation_step[TYPE]
+        error_code = validation_step[ERROR_CODE]
+        if kind == NAGOYA:
+            if not is_valid_nagoya(row, in_memory_sheets):
+                return error_code
+        elif kind == BIBLIO:
+            if not is_valid_pub(row):
+                return error_code
+        else:
+            msg = f'{kind} is not a recognized row validation type method'
+            raise NotImplementedError(msg)
+
+
+def validate_cell(value, validation_steps, crossrefs, shown_values, label):
+
+    for step_conf in validation_steps:
+        if step_conf[TYPE] == MANDATORY:
+            continue
+        step_conf['crossrefs_pointer'] = crossrefs
+        step_conf['shown_values'] = shown_values
+        step_conf['label'] = label
+        error_code = validate_value(value, step_conf)
+
+        if error_code is not None:
+            return error_code
+
+
+def is_valid_pub(row):
+    title = row.get('Title', None)
+    full_reference = row.get('Full reference', None)
+    authors = row.get('Authors', None)
+    journal = row.get('Journal', None)
+    year = row.get('Year', None)
+    volumen = row.get('Volumen', None)
+    first_page = row.get('First page', None)
+    book_title = row.get('Book title', None)
+    editors = row.get('Editors', None)
+    publishers = row.get('Publishers', None)
+
+    if full_reference:
+        return True
+    is_journal = bool(title)
+
+    if (is_journal and (not authors  or not journal or not not year or
+                        not volumen or not first_page)):
+        return False
+    if (not is_journal and (not authors or not year or
+                            not editors or not publishers or not book_title)):
+        return False
+
+    return True
+
+
+def is_valid_nagoya(row, in_memory_sheets):  # sourcery skip: return-identity
+    location_index = row.get('Geographic origin', None)
+    if location_index is None:
+        country = None
+    else:
+        geo_origin = in_memory_sheets[LOCATIONS].get(location_index, {})
+        country = geo_origin.get('Country', None)
+
+    _date = row.get("Date of collection", None)
+    if _date is None:
+        _date = row.get("Date of isolation", None)
+    if _date is None:
+        _date = row.get("Date of deposit", None)
+    if _date is None:
+        _date = row.get("Date of inclusion in the catalogue", None)
+    if _date is not None:
+        year = _date.year if isinstance(_date, datetime) else int(str(_date)[:4])
+    else:
+        year = None
+
+    if year is not None and year >= 2014 and country is None:
+        return False
+
+    return True
+
+
+def is_valid_regex(value, validation_conf):
+    if value is None:
+        return True
+    value = str(value)
+    regexp = validation_conf[MATCH]
+    multiple = validation_conf.get(MULTIPLE, False)
+    separator = validation_conf.get(SEPARATOR, None)
+
+    values = [v.strip() for v in value.split(
+        separator)] if multiple else [value]
+
+    for value in values:
+        matches_regexp = re.fullmatch(regexp, value)
+        if not matches_regexp:
+            return False
+    return True
+
+
+def is_valid_crossrefs(value, validation_conf):
+    crossref_name = validation_conf[CROSSREF_NAME]
+    crossrefs = validation_conf['crossrefs_pointer']
+    choices = crossrefs[crossref_name]
+    if value is None or not choices:
+        return True
+    value = str(value)
+
+    multiple = validation_conf.get(MULTIPLE, False)
+    separator = validation_conf.get(SEPARATOR, None)
+    if value is None:
+        return True
+    if multiple:
+        values = [v.strip() for v in value.split(separator)]
+    else:
+        values = [value.strip()]
+
+    return all(value in choices for value in values)
+
+
+def is_valid_choices(value, validation_conf):
+    if value is None:
+        return True
+    choices = validation_conf[VALUES]
+    multiple = validation_conf.get(MULTIPLE, False)
+    separator = validation_conf.get(SEPARATOR, None)
+
+    if multiple:
+        values = [v.strip() for v in str(value).split(separator)]
+    else:
+        values = [str(value).strip()]
+
+    return all(value in choices for value in values)
+
+
+def is_valid_date(value, validation_conf):
+    if value is None:
+        return True
+    if isinstance(value, datetime):
+        year = value.year
+        month = value.month
+        day = value.day
+    elif isinstance(value, int):
+        year = value
+        month = None
+        day = None
+    elif isinstance(value, str):
+        value = value.replace('-', '')
+        value = value.replace('/', '')
+        month = None
+        day = None
+        try:
+            year = int(value[: 4])
+            if len(value) >= 6:
+                month = int(value[4: 6])
+                if len(value) >= 8:
+                    day = int(value[6: 8])
+
+        except (IndexError, TypeError, ValueError):
+            return False
+    else:
+        return False
+
+    if year < 1700 or year > datetime.now().year:
+        return False
+    if month is not None:
+        if month < 1 or month > 13:
+            return False
+        if day is not None and (day < 1 or day > monthrange(year, month)[1]):
+            return False
+    return True
+
+
+def is_valid_coords(value, validation_conf=None):
+    # sourcery skip: return-identity
+    if value is None:
+        return True
+    try:
+        items = [i.strip() for i in value.split(";")]
+        latitude = float(items[0])
+        longitude = float(items[1])
+        if len(items) > 2:
+            precision = float(items[2])
+        if latitude < -90 or latitude > 90:
+            return False
+        if longitude < -180 or longitude > 180:
+            return False
+        return True
+    except:
+        return False
+
+
+def is_valid_missing(value, validation_conf=None):
+    return value is not None
+
+
+def is_valid_number(value, validation_conf):
+    if value is None:
+        return True
+    try:
+        value = float(value)
+    except TypeError:
+        return False
+    except ValueError:
+        return False
+
+    _max = validation_conf.get('max', None)
+    _min = validation_conf.get('min', None)
+    if (_max is not None and value > _max) or (_min is not None and value < _min):
+        return False
+
+    return True
+
+
+def is_valid_taxon(value, validation_conf=None):
+    multiple = validation_conf.get(MULTIPLE, False)
+    separator = validation_conf.get(SEPARATOR, ';')
+
+    value = value.split(separator) if multiple else [value]
+    for taxon in value:
+        taxon = taxon.strip()
+        if not _is_valid_taxon(taxon):
+            return False
+    return True
+
+
+def _is_valid_taxon(value):
+    value = value.strip()
+    if not value:
+        return True
+
+    items = re.split(r" +", value)
+    genus = items[0]
+
+    if len(items) > 1:
+        species = items[1]
+        if species in ("sp", "spp", ".sp", "sp."):
+            return False
+
+        if len(items) > 2:
+            for index in range(0, len(items[2:]), 2):
+                rank = SUBTAXAS.get(items[index + 2], None)
+                if rank is None:
+                    print(value)
+                    return False
+
+    return True
+
+
+def is_valid_unique(value, validation_conf):
+    label = validation_conf['label']
+    shown_values = validation_conf['shown_values']
+    if label not in shown_values:
+        shown_values[label] = {}
+
+    already_in_file = shown_values[label]
+    if value in already_in_file:
+        return False
+
+    # NOTE: what's the use of this?
+    # What is the expected format for value and shown_values?
+    shown_values[label][value] = None
+
+    return True
+
+
+def is_valid_file(path):
+    try:
+        with path.open("rb") as fhand:
+            error_log = validate_mirri_excel(fhand)
+            if "EXL" in error_log.get_errors():
+                return False
+    except:
+        return False
+
+    return True
+
+
+VALIDATION_FUNCTIONS = {
+    MISSING: is_valid_missing,
+    REGEXP: is_valid_regex,
+    CHOICES: is_valid_choices,
+    CROSSREF: is_valid_crossrefs,
+    DATE: is_valid_date,
+    COORDINATES: is_valid_coords,
+    NUMBER: is_valid_number,
+    TAXON: is_valid_taxon,
+    UNIQUE: is_valid_unique}
+
+
+def validate_value(value, step_conf):
+    kind = step_conf[TYPE]
+    try:
+        is_value_valid = VALIDATION_FUNCTIONS[kind]
+    except KeyError:
+        msg = f'This validation type {kind} is not implemented'
+        raise NotImplementedError(msg)
+
+    error_code = step_conf[ERROR_CODE]
+
+    if not is_value_valid(value, step_conf):
+        return error_code
--- a/mirri/validation/tags.py
+++ b/mirri/validation/tags.py
@ -0,0 +1,24 @@
+MANDATORY = "mandatory"
+REGEXP = "regexp"
+CHOICES = "choices"
+CROSSREF = 'crossref'
+CROSSREF_NAME = 'crossref_name'
+MISSING = "missing"
+VALIDATION = 'validation'
+ERROR_CODE = 'error_code'
+FIELD = 'field'
+MULTIPLE = 'multiple'
+TYPE = 'type'
+COLUMNS = 'columns'
+SOURCE = "sources"
+SEPARATOR = "separator"
+MATCH = 'match'
+VALUES = 'values'
+DATE = 'date'
+COORDINATES = 'coord'
+NUMBER = 'number'
+TAXON = 'taxon'
+UNIQUE = 'unique'
+ROW_VALIDATION = 'row_validation'
+NAGOYA = 'nagoya'
+BIBLIO = 'bibliography'
--- a/mirri/validation/validation_conf_20200601.py
+++ b/mirri/validation/validation_conf_20200601.py
@ -0,0 +1,548 @@
+from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
+                                   ERROR_CODE, FIELD, MANDATORY, MATCH,
+                                   MISSING, MULTIPLE, NAGOYA, NUMBER, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON, TYPE,
+                                   UNIQUE,
+                                   VALIDATION, VALUES, BIBLIO)
+from mirri.settings import (GEOGRAPHIC_ORIGIN, ONTOBIOTOPE, LOCATIONS, GROWTH_MEDIA, GENOMIC_INFO,
+                            STRAINS, LITERATURE_SHEET, SEXUAL_STATE_SHEET)
+# MARKERS,
+# SEXUAL_STATE_SHEET,
+# RESOURCE_TYPES_VALUES,
+# FORM_OF_SUPPLY_SHEET,
+# PLOIDY_SHEET)
+
+
+STRAIN_FIELDS = [
+    {
+        FIELD: "Accession number",
+        VALIDATION: [
+            {TYPE: MANDATORY, ERROR_CODE: 'STD01'},
+            {TYPE: UNIQUE, ERROR_CODE: 'STD03'},
+            {TYPE: MISSING, ERROR_CODE: "STD02"},
+            {TYPE: REGEXP, MATCH: "[^ ]* [^ ]*", ERROR_CODE: "STD04"}
+        ]
+    },
+    {
+        FIELD: "Restrictions on use",
+        VALIDATION: [
+            {TYPE: MANDATORY, ERROR_CODE: "STD05"},
+            {TYPE: MISSING, ERROR_CODE: "STD06"},
+            {TYPE: CHOICES, VALUES: ["1", "2", "3"],
+             MULTIPLE: False, ERROR_CODE: "STD07"}
+        ]
+    },
+    {
+        FIELD: "Nagoya protocol restrictions and compliance conditions",
+        VALIDATION: [
+            {TYPE: MANDATORY, ERROR_CODE: "STD08"},
+            {TYPE: MISSING, ERROR_CODE: "STD09"},
+            {TYPE: CHOICES, VALUES: ["1", "2", "3"],
+             MULTIPLE: False, ERROR_CODE: "STD10"}
+        ]
+    },
+    {
+        FIELD: "ABS related files",
+        VALIDATION: [],
+    },
+    {
+        FIELD: "MTA file",
+        VALIDATION: [],
+    },
+    {
+        FIELD: "Other culture collection numbers",
+        # VALIDATION: [
+        #     {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD07",
+        #      MULTIPLE: True, SEPARATOR: ";"}
+        # ]
+    },
+    {
+        FIELD: "Strain from a registered collection",
+        VALIDATION: [
+            {TYPE: CHOICES, VALUES: ["1", "2"],
+             ERROR_CODE: "STD11"}
+        ]
+    },
+    {
+        FIELD: "Risk Group",
+
+        VALIDATION: [
+            {TYPE: MANDATORY, ERROR_CODE: "STD12"},
+            {TYPE: MISSING, ERROR_CODE: "STD13"},
+            {TYPE: CHOICES, VALUES: ["1", "2", "3", "4"],
+             MULTIPLE: False, ERROR_CODE: "STD14"}
+        ]
+    },
+    {
+        FIELD: "Dual use",
+        VALIDATION: [
+            {TYPE: CHOICES, VALUES: ["1", "2"],
+             ERROR_CODE: "STD15"}
+        ]
+    },
+    {
+        FIELD: "Quarantine in Europe",
+        VALIDATION: [
+            {TYPE: CHOICES, VALUES: ["1", "2"],
+             ERROR_CODE: "STD16"}
+        ]
+    },
+    {
+        FIELD: "Organism type",
+        VALIDATION: [
+            {TYPE: MANDATORY, ERROR_CODE: "STD17"},
+            {TYPE: MISSING, ERROR_CODE: "STD18"},
+            {TYPE: CHOICES, VALUES: ["Algae", "Archaea", "Bacteria",
+                                     "Cyanobacteria", "Filamentous Fungi",
+                                     "Phage", "Plasmid", "Virus", "Yeast",
+                                     "1", "2", "3", "4", "5", "6", "7", "8", "9"],
+             MULTIPLE: True, SEPARATOR: ";",  ERROR_CODE: "STD19"}
+        ]
+    },
+    {
+        FIELD: "Taxon name",
+        VALIDATION: [
+            {TYPE: MANDATORY, ERROR_CODE: "STD20"},
+            {TYPE: MISSING, ERROR_CODE: "STD21"},
+            {TYPE: TAXON, ERROR_CODE: "STD22", MULTIPLE: True,
+             SEPARATOR: ';'}
+        ]
+    },
+    {
+        FIELD: "Infrasubspecific names",
+    },
+    {
+        FIELD: "Comment on taxonomy",
+    },
+    {
+        FIELD: "Interspecific hybrid",
+        VALIDATION: [
+            {TYPE: CHOICES, VALUES: ["1", "2"],
+             ERROR_CODE: "STD23"}
+        ]
+    },
+    {
+        FIELD: "Status",
+    },
+    {
+        FIELD: "History of deposit",
+        VALIDATION: [
+            # {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD24",  # modify the regex
+            #  MULTIPLE: True, SEPARATOR: ";"}
+        ]
+    },
+    {
+        FIELD: "Depositor"
+    },
+    {
+        FIELD: "Date of deposit",
+        VALIDATION: [
+            {TYPE: DATE, ERROR_CODE: "STD25"},
+        ]
+    },
+    {
+        FIELD: "Date of inclusion in the catalogue",
+        VALIDATION: [
+            {TYPE: DATE, ERROR_CODE: "STD26"},
+        ]
+    },
+    {
+        FIELD: "Collected by",
+    },
+    {
+        FIELD: "Date of collection",
+        VALIDATION: [
+            {TYPE: DATE, ERROR_CODE: "STD27"},
+        ]
+    },
+    {
+        FIELD: "Isolated by",
+    },
+    {
+        FIELD: "Date of isolation",
+        VALIDATION: [
+            {TYPE: DATE, ERROR_CODE: "STD28"},
+        ]
+    },
+    {
+        FIELD: "Substrate/host of isolation",
+    },
+    {
+        FIELD: "Tested temperature growth range",
+        VALIDATION: [
+            {TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?',
+             ERROR_CODE: "STD29", MULTIPLE: True, SEPARATOR: ";"}
+        ]
+    },
+    {
+        FIELD: "Recommended growth temperature",
+        VALIDATION: [
+            {TYPE: MANDATORY, ERROR_CODE: "STD30"},
+            {TYPE: MISSING, ERROR_CODE: "STD31"},
+            {TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?',
+             ERROR_CODE: "STD32",
+             MULTIPLE: True, SEPARATOR: ";"}
+        ]
+    },
+    {
+        FIELD: "Recommended medium for growth",
+        VALIDATION: [
+            {TYPE: MANDATORY, ERROR_CODE: "STD33"},
+            {TYPE: MISSING, ERROR_CODE: "STD34"},
+            {TYPE: CROSSREF, CROSSREF_NAME: "Growth media",
+             MULTIPLE: True, SEPARATOR: "/", ERROR_CODE: "STD35"}
+        ]
+    },
+    {
+        FIELD: "Form of supply",
+        VALIDATION: [
+            {TYPE: MANDATORY, ERROR_CODE: "STD36"},
+            {TYPE: MISSING, ERROR_CODE: "STD37"},
+            {TYPE: CHOICES, VALUES: ['Agar', 'Cryo', 'Dry Ice', 'Liquid Culture Medium',
+                                     'Lyo', 'Oil', 'Water'],
+             MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD38"}
+        ]
+    },
+    {
+        FIELD: "Other denomination",
+    },
+    {
+        FIELD: "Coordinates of geographic origin",
+        VALIDATION: [
+            {TYPE: COORDINATES, ERROR_CODE: "STD39"},
+        ]
+    },
+    {
+        FIELD: "Altitude of geographic origin",
+        VALIDATION: [
+            {TYPE: NUMBER, 'max': 8000, 'min': -200, ERROR_CODE: "STD40"},
+        ]
+    },
+    {
+        # value can be in the cell or in another sheet. Don't configure this
+        FIELD: "Geographic origin",
+    },
+    {
+        FIELD: "Isolation habitat",
+    },
+    {
+        FIELD: "Ontobiotope term for the isolation habitat",
+        VALIDATION: [
+            {TYPE: CROSSREF, CROSSREF_NAME: "Ontobiotope",
+             MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD41"}
+        ]
+    },
+    {
+        FIELD: "GMO",
+        VALIDATION: [
+            {TYPE: CHOICES, VALUES: ["1", "2"],
+             ERROR_CODE: "STD42"}
+        ]
+    },
+    {
+        FIELD: "GMO construction information",
+    },
+    {
+        FIELD: "Mutant information",
+    },
+    {
+        FIELD: "Genotype",
+    },
+    {
+        FIELD: "Sexual state",
+        VALIDATION: [
+            {TYPE: CROSSREF, CROSSREF_NAME: SEXUAL_STATE_SHEET,
+             ERROR_CODE: "STD43"}
+            # {TYPE: CHOICES, VALUES: ["Mata", "Matalpha", "Mata/Matalpha",
+            #                          "Matb", "Mata/Matb", "MTLa", "MTLalpha", "MTLa/MTLalpha",
+            #                          "MAT1-1", "MAT1-2", "MAT1", "MAT2", "MT+", "MT-"],
+            #  ERROR_CODE: "STD43"}
+        ]
+    },
+    {
+        FIELD: "Ploidy",
+        VALIDATION: [
+            {TYPE: CHOICES, VALUES: ["0", "1", "2", "3", "4", "9"],
+             ERROR_CODE: "STD44"}
+        ]
+    },
+    {
+        FIELD: "Plasmids",
+    },
+    {
+        FIELD: "Plasmids collections fields",
+    },
+    {
+        # value can be in the cell or in another sheet. Don't configure this
+        FIELD: "Literature",
+        VALIDATION: [
+            {TYPE: CROSSREF, CROSSREF_NAME: LITERATURE_SHEET,
+             MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD45"}
+        ]
+    },
+    {
+        FIELD: "Plant pathogenicity code",
+    },
+    {
+        FIELD: "Pathogenicity",
+    },
+    {
+        FIELD: "Enzyme production",
+    },
+    {
+        FIELD: "Production of metabolites",
+    },
+    {
+        FIELD: "Applications",
+    },
+    {
+        FIELD: "Remarks"
+    },
+    {
+        FIELD: "Literature linked to the sequence/genome",
+    },
+]
+SHEETS_SCHEMA = {
+    LOCATIONS: {
+        "acronym": "GOD",
+        "id_field": "ID",
+        VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS02"},
+        COLUMNS: [
+            {
+                FIELD: "ID",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "GOD01"},
+                    {TYPE: MISSING, ERROR_CODE: "GOD02"},
+                ]
+            },
+            {
+                FIELD: "Country",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "GOD03"},
+                    {TYPE: MISSING, ERROR_CODE: "GOD04"}
+                ]
+            },
+            {
+                FIELD: "Region",
+                VALIDATION: []
+            },
+            {
+                FIELD: "City",
+                VALIDATION: []
+            },
+            {
+                FIELD: "Locality",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "GOD06"},
+                    {TYPE: MISSING, ERROR_CODE: "GOD07"}
+                ]
+            }
+        ],
+    },
+    GROWTH_MEDIA: {
+        "acronym": "GMD",
+        "id_field": "Acronym",
+        VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS01"},
+        COLUMNS: [
+            {
+                FIELD: "Acronym",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "GMD01"},
+                    {TYPE: MISSING, ERROR_CODE: "GMD02"}
+                ]
+            },
+            {
+                FIELD: "Description",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "GMD03"},
+                    {TYPE: MISSING, ERROR_CODE: "GMD04"}
+                ]
+            },
+            {
+                FIELD: "Full description",
+                VALIDATION: []
+            },
+        ],
+    },
+    GENOMIC_INFO: {
+        "acronym": "GID",
+        "id_field": "Strain AN",
+        VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS08"},
+        COLUMNS: [
+            {
+                FIELD: "Strain AN",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "GID01"},
+                    {TYPE: MISSING, ERROR_CODE: "GID02"},
+                    {TYPE: CROSSREF, CROSSREF_NAME: "Strains",
+                     ERROR_CODE: "GID03"},
+                ]
+            },
+            {
+                FIELD: "Marker",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "GID04"},
+                    {TYPE: MISSING, ERROR_CODE: "GID05"},
+                    {TYPE: CHOICES, ERROR_CODE: "GID06",
+                     VALUES: ['16S rRNA', 'ACT', 'CaM', 'EF-1α', 'ITS',
+                              'LSU', 'RPB1', 'RPB2', 'TUBB']}
+                ]
+            },
+            {
+                FIELD: "INSDC AN",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "GID07"},
+                    {TYPE: MISSING, ERROR_CODE: "GID08"},
+                ]
+            },
+            {
+                FIELD: "Sequence",
+                VALIDATION: []
+            },
+        ],
+    },
+    STRAINS: {
+        "acronym": "STD",
+        'id_field': 'Accession number',
+        VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS05"},
+        ROW_VALIDATION: [
+            {TYPE: NAGOYA, ERROR_CODE: "STRXXX"},
+        ],
+        COLUMNS: STRAIN_FIELDS,
+    },
+    LITERATURE_SHEET: {
+        "acronym": "LID",
+        'id_field': 'ID',
+        VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS03"},
+        ROW_VALIDATION: [
+            {TYPE: BIBLIO, ERROR_CODE: 'LID17'}
+        ],
+        COLUMNS: [
+            {
+                FIELD: "ID",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "LID01"},
+                    {TYPE: MISSING, ERROR_CODE: "LID02"},
+                ]
+            },
+            {
+                FIELD: "Full reference",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "LID03"},
+                ]
+            },
+            {
+                FIELD: "Authors",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "LID05"},
+                ]
+            },
+            {
+                FIELD: "Title",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "LID07"},
+                ]
+            },
+            {
+                FIELD: "Journal",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "LID09"},
+                ]
+            },
+            {
+                FIELD: "Year",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "LID11"},
+                ]
+            },
+            {
+                FIELD: "Volume",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "LID13"},
+                ]
+            },
+            {
+                FIELD: "Issue",
+                VALIDATION: []
+            },
+            {
+                FIELD: "First page",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "LID15"},
+                    {TYPE: MISSING, ERROR_CODE: "LID16"},
+                ]
+            },
+            {
+                FIELD: "Last page",
+                VALIDATION: []
+            },
+            {
+                FIELD: "Book title",
+                VALIDATION: []
+            },
+            {
+                FIELD: "Editors",
+                VALIDATION: []
+            },
+            {
+                FIELD: "Publisher",
+                VALIDATION: []
+            }
+        ],
+    },
+    # SEXUAL_STATE_SHEET: {"acronym": "SSD", COLUMNS: []},
+    # RESOURCE_TYPES_VALUES: {"acronym": "RTD", COLUMNS: []},
+    # FORM_OF_SUPPLY_SHEET: {"acronym": "FSD", COLUMNS: []},
+    # PLOIDY_SHEET: {"acronym": "PLD", COLUMNS: []},
+    ONTOBIOTOPE: {
+        "acronym": "OTD",
+        "id_field": "ID",
+        VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS06"},
+        COLUMNS: [
+            {
+                FIELD: "ID",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "OTD01"},
+                    {TYPE: MISSING, ERROR_CODE: "OTD02"},
+                ]
+            },
+            {
+                FIELD: "Name",
+                VALIDATION: [
+                    {TYPE: MANDATORY, ERROR_CODE: "OTD03"},
+                    {TYPE: MISSING, ERROR_CODE: "OTD04"},
+                ]
+            },
+        ]
+    },
+    # MARKERS: {
+    #     "acronym": "MKD",
+    #     "id_field": "",
+    #     COLUMNS: [
+    #         {
+    #             FIELD: "Acronym",
+    #             VALIDATION: []
+    #         },
+    #         {
+    #             FIELD: "Marker",
+    #             VALIDATION: []
+    #         },
+    #     ],
+    # },
+}
+
+CROSS_REF_CONF = {
+    ONTOBIOTOPE: ['ID', 'Name'],
+    LITERATURE_SHEET: ['ID'],
+    LOCATIONS: ['Locality'],
+    GROWTH_MEDIA: ['Acronym'],
+    STRAINS: ["Accession number"],
+    SEXUAL_STATE_SHEET: []
+
+}
+
+MIRRI_20200601_VALLIDATION_CONF = {
+    'sheet_schema': SHEETS_SCHEMA,
+    'cross_ref_conf': CROSS_REF_CONF,
+    'keep_sheets_in_memory': [
+        {'sheet_name': LOCATIONS, 'indexed_by': 'Locality'}]
+}
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
+openpyxl
+requests
+requests_oauthlib
+pycountry
+deepdiff
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,35 @@
+import setuptools
+from pathlib import Path
+from setuptools import find_packages
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+requirements = [line.strip() for line in open('requirements.txt')]
+scripts = [str(f) for f in Path('./bin').glob('*.py')]
+
+setuptools.setup(
+    name="Mirri utils",  # Replace with your own username
+    version=0.1,
+    author="P.Ziarsolo",
+    author_email="pziarsolo@gmail.com",
+    description="A small library to help dealing with MIRRI data",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/pziarsolo/mirri_utils",
+    packages=find_packages(),
+    package_data={"mirri": ['data/ontobiotopes.csv']},
+    # package_dir={"mirri.entities": "mirri.entities"
+    #              "mirri.io.parsers": "mirri.io.parsers",
+    #              "mirri.io.writers": "mirri.io.writers",
+    #              'mirri.validation': 'mirri.vallidation'},
+    install_requires=requirements,
+    scripts=scripts,
+    license="GNU General Public License v3.0",
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    python_requires='>=3.6',
+)
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/biolomics/init.py
+++ b/tests/biolomics/init.py
--- a/tests/biolomics/test_auth_operations.py
+++ b/tests/biolomics/test_auth_operations.py
@ -0,0 +1,22 @@
+import unittest
+
+from mirri.biolomics.remote.rest_client import BiolomicsClient
+try:
+    from mirri.biolomics.secrets import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
+except ImportError:
+    raise ImportError(
+        'You need a secrets.py in the project dir. with CLIENT_ID, SECRET_ID, USERNAME, PASSWORD')
+
+from .utils import VERSION, SERVER_URL
+
+
+class BiolomicsClientAuthTest(unittest.TestCase):
+
+     def test_authentication(self):
+        client = BiolomicsClient(SERVER_URL, VERSION, CLIENT_ID, SECRET_ID,
+                                 USERNAME, PASSWORD)
+        access1 = client.get_access_token()
+        access2 = client.get_access_token()
+        assert access1 is not None
+        self.assertEqual(access1, access2)
+
--- a/tests/biolomics/test_growth_medium_operations.py
+++ b/tests/biolomics/test_growth_medium_operations.py
@ -0,0 +1,62 @@
+import unittest
+
+from mirri.biolomics.remote.endoint_names import GROWTH_MEDIUM_WS
+from mirri.biolomics.serializers.growth_media import GrowthMedium
+from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
+from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
+from tests.biolomics.utils import SERVER_URL, VERSION
+
+
+class BiolomicsSequenceClientTest(unittest.TestCase):
+    def setUp(self):
+        self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
+                                           SECRET_ID, USERNAME, PASSWORD)
+
+    def test_retrieve_media_by_id(self):
+        record_id = 101
+        growth_medium = self.client.retrieve_by_id('growth_medium', record_id)
+        self.assertEqual(growth_medium.record_id, record_id)
+
+        self.assertEqual(growth_medium.record_name, 'MA2PH6')
+
+    def test_retrieve_media_by_id(self):
+        record_name = 'MA2PH6'
+        record_id = 101
+        growth_medium = self.client.retrieve_by_name('growth_medium', record_name)
+        self.assertEqual(growth_medium.record_id, record_id)
+        self.assertEqual(growth_medium.record_name, record_name)
+
+    def test_create_growth_media(self):
+        self.client.start_transaction()
+        try:
+            growth_medium = GrowthMedium()
+            growth_medium.acronym = 'BBB'
+            growth_medium.ingredients = 'alkhdflakhf'
+            growth_medium.description = 'desc'
+
+            new_growth_medium = self.client.create(GROWTH_MEDIUM_WS, growth_medium)
+            print(new_growth_medium.dict())
+        finally:
+            self.client.rollback()
+
+    def test_update_growth_media(self):
+        self.client.start_transaction()
+        try:
+            growth_medium = GrowthMedium()
+            growth_medium.acronym = 'BBB'
+            growth_medium.ingredients = 'alkhdflakhf'
+            growth_medium.description = 'desc'
+            growth_medium.full_description = 'full'
+            new_growth_medium = self.client.create(GROWTH_MEDIUM_WS, growth_medium)
+
+            new_growth_medium.full_description = 'full2'
+            updated_gm = new_growth_medium = self.client.update(GROWTH_MEDIUM_WS, new_growth_medium)
+            self.assertEqual(updated_gm.full_description, new_growth_medium.full_description)
+
+            retrieved = self.client.retrieve_by_id(GROWTH_MEDIUM_WS, new_growth_medium.record_id)
+            self.assertEqual(retrieved.full_description, updated_gm.full_description)
+
+        finally:
+            self.client.rollback()
+
+
--- a/tests/biolomics/test_literature_operations.py
+++ b/tests/biolomics/test_literature_operations.py
@ -0,0 +1,46 @@
+import unittest
+
+from .utils import VERSION, SERVER_URL
+from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
+from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient, BIBLIOGRAPHY_WS
+from mirri.entities.publication import Publication
+
+
+class BiolomicsLiteratureClientTest(unittest.TestCase):
+    def setUp(self):
+        self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
+                                           SECRET_ID, USERNAME, PASSWORD)
+
+    def test_retrieve_biblio_by_id(self):
+        record_id = 100
+        record_name = "Miscellaneous notes on Mucoraceae"
+        biblio = self.client.retrieve_by_id(BIBLIOGRAPHY_WS, record_id)
+        self.assertEqual(biblio.record_id, record_id)
+
+        self.assertEqual(biblio.record_name, record_name)
+
+    def test_retrieve_media_by_id(self):
+        record_id = 100
+        record_name = "Miscellaneous notes on Mucoraceae"
+        biblio = self.client.retrieve_by_name(BIBLIOGRAPHY_WS, record_name)
+        self.assertEqual(biblio.record_id, record_id)
+        self.assertEqual(biblio.record_name, record_name)
+        self.assertEqual(biblio.year, 1994)
+        self.assertEqual(biblio.volume, '50')
+
+    def test_create_biblio(self):
+        pub = Publication()
+        pub.pubmed_id = 'PM18192'
+        pub.journal = 'my_journal'
+        pub.title = 'awesome title'
+        pub.authors = 'pasdas, aposjdasd, alsalsfda'
+        pub.volume = 'volume 0'
+        record_id = None
+        try:
+            new_pub = self.client.create(BIBLIOGRAPHY_WS, pub)
+            record_id = new_pub.record_id
+            self.assertEqual(new_pub.title, pub.title)
+            self.assertEqual(new_pub.volume, pub.volume)
+        finally:
+            if record_id is not None:
+                self.client.delete_by_id(BIBLIOGRAPHY_WS, record_id)
--- a/tests/biolomics/test_sequence_operations.py
+++ b/tests/biolomics/test_sequence_operations.py
@ -0,0 +1,49 @@
+import unittest
+
+from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
+from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
+from mirri.biolomics.serializers.sequence import GenomicSequenceBiolomics
+from .utils import VERSION, SERVER_URL
+
+
+class BiolomicsSequenceClientTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
+                                           SECRET_ID, USERNAME, PASSWORD)
+
+    def test_retrieve_seq_by_id(self):
+        record_id = 101
+        sequence = self.client.retrieve_by_id('sequence', record_id)
+
+        self.assertEqual(sequence.record_id, record_id)
+        self.assertEqual(sequence.record_name, 'MUM 02.54 - CaM')
+        self.assertEqual(sequence.marker_type, 'CaM')
+
+    def test_retrieve_seq_by_name(self):
+        record_name = 'MUM 02.54 - CaM'
+        sequence = self.client.retrieve_by_name('sequence', record_name)
+
+        self.assertEqual(sequence.record_id, 101)
+        self.assertEqual(sequence.record_name, record_name)
+        self.assertEqual(sequence.marker_type, 'CaM')
+
+    def test_create_delete_sequence(self):
+        marker = GenomicSequenceBiolomics()
+        marker.marker_id = 'GGAAUUA'
+        marker.marker_seq = 'aattgacgat'
+        marker.marker_type = 'CaM'
+        marker.record_name = 'peioMarker'
+
+        new_marker = self.client.create('sequence', marker)
+        self.assertEqual(new_marker.marker_id, 'GGAAUUA')
+        self.assertEqual(new_marker.marker_seq, 'aattgacgat')
+        self.assertEqual(new_marker.marker_type, 'CaM')
+        self.assertEqual(new_marker.record_name, 'peioMarker')
+        self.assertTrue(new_marker.record_id)
+
+        self.client.delete_by_id('sequence', new_marker.record_id)
+
+
+if __name__ == "__main__":
+    # import sys;sys.argv = ['', 'BiolomicsClient.Test.test_get_strain_by_id']
+    unittest.main()
--- a/tests/biolomics/test_serializers.py
+++ b/tests/biolomics/test_serializers.py
@ -0,0 +1,727 @@
+import unittest
+import pycountry
+import deepdiff
+from pprint import pprint
+from mirri.biolomics.serializers.sequence import (
+    GenomicSequenceBiolomics,
+    serialize_to_biolomics as sequence_to_biolomics,
+    serialize_from_biolomics as sequence_from_biolomics)
+
+from mirri.biolomics.serializers.strain import (
+    serialize_to_biolomics as strain_to_biolomics,
+    serialize_from_biolomics as strain_from_biolomics)
+from mirri.biolomics.serializers.growth_media import (
+    # serialize_to_biolomics as growth_medium_to_biolomics,
+    serialize_from_biolomics as growth_medium_from_biolomics)
+from mirri.biolomics.serializers.bibliography import (
+    serializer_from_biolomics as literature_from_biolomics,
+    serializer_to_biolomics as literature_to_biolomics
+)
+from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
+from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
+from mirri.entities.publication import Publication
+from .utils import create_full_data_strain, VERSION, SERVER_URL
+
+
+STRAIN_WS = {
+    'CreationDate': '2021-05-19T12:22:33',
+    'CreatorUserName': 'pziarsolo@cect.org',
+    'LastChangeDate': '2021-05-19T12:22:36',
+    'LastChangeUserName': 'pziarsolo@cect.org',
+    'RecordDetails': {'ABS related files': {'FieldType': 21,
+                                            'Value': [{'Name': 'link',
+                                                       'Value': 'https://example.com'}]},
+                      'Altitude of geographic origin': {'FieldType': 4,
+                                                        'Value': 121.0},
+                      'Applications': {'FieldType': 5, 'Value': 'health'},
+                      'Catalog URL': {'FieldType': 21, 'Value': []},
+                      'Collection accession number': {'FieldType': 5,
+                                                      'Value': 'TESTCC 1'},
+                      'Collection date': {'FieldType': 8, 'Value': '1991/01/01'},
+                      'Collector': {'FieldType': 5, 'Value': 'the collector'},
+                      'Comment on taxonomy': {'FieldType': 5,
+                                              'Value': 'lalalalla'},
+                      'Coordinates of geographic origin': {'FieldType': 12,
+                                                           'Value': {'Altitude': 0.0,
+                                                                     'Latitude': 23.3,
+                                                                     'Longitude': 23.3,
+                                                                     'Precision': 0.0}},
+                      'Country': {'FieldType': 118,
+                                  'Value': [{'Name': {'FieldType': 5,
+                                                      'Value': 'Spain'},
+                                             'RecordId': 54,
+                                             'TargetFieldValue': None}]},
+                      'Data provided by': {'FieldType': 22, 'Value': 'Unknown'},
+                      'Date of inclusion in the catalogue': {'FieldType': 8,
+                                                             'Value': '1985/05/02'},
+                      'Deposit date': {'FieldType': 8, 'Value': '1985/05/02'},
+                      'Depositor': {'FieldType': 5,
+                                    'Value': 'NCTC, National Collection of Type '
+                                             'Cultures - NCTC, London, United '
+                                             'Kingdom of Great Britain and '
+                                             'Northern Ireland.'},
+                      'Dual use': {'FieldType': 20, 'Value': 'yes'},
+                      'Enzyme production': {'FieldType': 5,
+                                            'Value': 'some enzimes'},
+                      'Form': {'FieldType': 3,
+                               'Value': [{'Name': 'Agar', 'Value': 'yes'},
+                                         {'Name': 'Cryo', 'Value': 'no'},
+                                         {'Name': 'Dry Ice', 'Value': 'no'},
+                                         {'Name': 'Liquid Culture Medium',
+                                          'Value': 'no'},
+                                         {'Name': 'Lyo', 'Value': 'yes'},
+                                         {'Name': 'Oil', 'Value': 'no'},
+                                         {'Name': 'Water', 'Value': 'no'}]},
+                      'GMO': {'FieldType': 22, 'Value': 'Yes'},
+                      'GMO construction information': {'FieldType': 5,
+                                                       'Value': 'instructrion to '
+                                                                'build'},
+                      'Genotype': {'FieldType': 5, 'Value': 'some genotupe'},
+                      'Geographic origin': {'FieldType': 5,
+                                            'Value': 'una state; one '
+                                                     'municipality; somewhere in '
+                                                     'the world'},
+                      'History': {'FieldType': 5,
+                                  'Value': 'newer < In the middle < older'},
+                      'Infrasubspecific names': {'FieldType': 5,
+                                                 'Value': 'serovar tete'},
+                      'Interspecific hybrid': {'FieldType': 20, 'Value': 'no'},
+                      'Isolation date': {'FieldType': 8, 'Value': '1900/01/01'},
+                      'Isolation habitat': {'FieldType': 5,
+                                            'Value': 'some habitat'},
+                      'Isolator': {'FieldType': 5, 'Value': 'the isolator'},
+                      'Literature': {'FieldType': 118, 'Value': []},
+                      'MTA files URL': {'FieldType': 21,
+                                        'Value': [{'Name': 'link',
+                                                   'Value': 'https://example.com'}]},
+                      'MTA text': {'FieldType': 5, 'Value': ''},
+                      'Metabolites production': {'FieldType': 5,
+                                                 'Value': 'big factory of cheese'},
+                      'Mutant information': {'FieldType': 5, 'Value': 'x-men'},
+                      'Nagoya protocol restrictions and compliance conditions': {'FieldType': 20,
+                                                                                 'Value': 'no '
+                                                                                          'known '
+                                                                                          'restrictions '
+                                                                                          'under '
+                                                                                          'the '
+                                                                                          'Nagoya '
+                                                                                          'protocol'},
+                      'Ontobiotope': {'FieldType': 118,
+                                      'Value': [{'Name': {'FieldType': 5,
+                                                          'Value': 'anaerobic '
+                                                                   'bioreactor '
+                                                                   '(OBT:000190)'},
+                                                 'RecordId': 100,
+                                                 'TargetFieldValue': None}]},
+                      'Ontobiotope term for the isolation habitat': {'FieldType': 5,
+                                                                     'Value': ''},
+                      'Orders': {'FieldType': 118, 'Value': []},
+                      'Organism type': {'FieldType': 3,
+                                        'Value': [{'Name': 'Algae', 'Value': 'no'},
+                                                  {'Name': 'Archaea',
+                                                   'Value': 'yes'},
+                                                  {'Name': 'Bacteria',
+                                                   'Value': 'no'},
+                                                  {'Name': 'Cyanobacteria',
+                                                   'Value': 'no'},
+                                                  {'Name': 'Filamentous Fungi',
+                                                   'Value': 'no'},
+                                                  {'Name': 'Phage', 'Value': 'no'},
+                                                  {'Name': 'Plasmid',
+                                                   'Value': 'no'},
+                                                  {'Name': 'Virus', 'Value': 'no'},
+                                                  {'Name': 'Yeast', 'Value': 'no'},
+                                                  {'Name': 'Microalgae',
+                                                   'Value': '?'}]},
+                      'Other culture collection numbers': {'FieldType': 5,
+                                                           'Value': 'aaa a; aaa3 '
+                                                                    'a3'},
+                      'Other denomination': {'FieldType': 5, 'Value': ''},
+                      'Pathogenicity': {'FieldType': 5, 'Value': 'illness'},
+                      'Plasmids': {'FieldType': 5, 'Value': 'asda'},
+                      'Plasmids collections fields': {'FieldType': 5,
+                                                      'Value': 'asdasda'},
+                      'Ploidy': {'FieldType': 20, 'Value': 'Polyploid'},
+                      'Quarantine in Europe': {'FieldType': 20, 'Value': 'no'},
+                      'Recommended growth medium': {'FieldType': 118,
+                                                    'Value': [{'Name': {'FieldType': 5,
+                                                                        'Value': 'AAA'},
+                                                               'RecordId': 1,
+                                                               'TargetFieldValue': None}]},
+                      'Recommended growth temperature': {'FieldType': 19,
+                                                         'MaxValue': 30.0,
+                                                         'MinValue': 30.0},
+                      'Remarks': {'FieldType': 5, 'Value': 'no remarks for me'},
+                      'Restrictions on use': {'FieldType': 20,
+                                              'Value': 'no restriction apply'},
+                      'Risk group': {'FieldType': 20, 'Value': '1'},
+                      'Sequences 16s': {"Value": [
+                          {
+                              "Name": {
+                                  "Value": "X76436",
+                                  "FieldType": 5
+                              },
+                              "RecordId": 50992,
+                              "TargetFieldValue": {
+                                  "Value": {
+                                      "Sequence": ""
+                                  },
+                                  "FieldType": 14
+                              }
+                          }
+                      ],
+                         "FieldType": 114},
+                      'Sequences 18S rRNA': {'FieldType': 114, 'Value': []},
+                      'Sequences 23S rRNA': {'FieldType': 114, 'Value': []},
+                      'Sequences ACT': {'FieldType': 114, 'Value': []},
+                      'Sequences AmdS': {'FieldType': 114, 'Value': []},
+                      'Sequences Amds12': {'FieldType': 114, 'Value': []},
+                      'Sequences Beta tubulin': {'FieldType': 114, 'Value': []},
+                      'Sequences COX1': {'FieldType': 114, 'Value': []},
+                      'Sequences COX2': {'FieldType': 114, 'Value': []},
+                      'Sequences CaM': {'FieldType': 114, 'Value': []},
+                      'Sequences Cct8': {'FieldType': 114, 'Value': []},
+                      'Sequences Cit1': {'FieldType': 114, 'Value': []},
+                      'Sequences CypA': {'FieldType': 114, 'Value': []},
+                      'Sequences GDP': {'FieldType': 114, 'Value': []},
+                      'Sequences GPD': {'FieldType': 114, 'Value': []},
+                      'Sequences Genome': {'FieldType': 114, 'Value': []},
+                      'Sequences HIS': {'FieldType': 114, 'Value': []},
+                      'Sequences HSP': {'FieldType': 114, 'Value': []},
+                      'Sequences IDH': {'FieldType': 114, 'Value': []},
+                      'Sequences IGS': {'FieldType': 114, 'Value': []},
+                      'Sequences ITS': {'FieldType': 114, 'Value': []},
+                      'Sequences LSU': {'FieldType': 114, 'Value': []},
+                      'Sequences MAT': {'FieldType': 114, 'Value': []},
+                      'Sequences MAT1': {'FieldType': 114, 'Value': []},
+                      'Sequences Miscellaneous': {'FieldType': 114, 'Value': []},
+                      'Sequences NorA': {'FieldType': 114, 'Value': []},
+                      'Sequences NorB': {'FieldType': 114, 'Value': []},
+                      'Sequences Omt12': {'FieldType': 114, 'Value': []},
+                      'Sequences OmtA': {'FieldType': 114, 'Value': []},
+                      'Sequences PcCYP': {'FieldType': 114, 'Value': []},
+                      'Sequences PpgA': {'FieldType': 114, 'Value': []},
+                      'Sequences PreA': {'FieldType': 114, 'Value': []},
+                      'Sequences PreB': {'FieldType': 114, 'Value': []},
+                      'Sequences RAPD': {'FieldType': 114, 'Value': []},
+                      'Sequences RPB1': {'FieldType': 114, 'Value': []},
+                      'Sequences RPB2': {'FieldType': 114, 'Value': []},
+                      'Sequences SSU': {'FieldType': 114, 'Value': []},
+                      'Sequences TEF1a': {'FieldType': 114, 'Value': []},
+                      'Sequences TEF2': {'FieldType': 114, 'Value': []},
+                      'Sequences TUB': {'FieldType': 114, 'Value': []},
+                      'Sequences Tsr1': {'FieldType': 114, 'Value': []},
+                      'Sequences c16S rRNA': {'FieldType': 114, 'Value': []},
+                      'Sequences cbhI': {'FieldType': 114, 'Value': []},
+                      'Sequences mcm7': {'FieldType': 114, 'Value': []},
+                      'Sequences rbcL': {'FieldType': 114, 'Value': []},
+                      'Sexual state': {'FieldType': 5, 'Value': 'MT+A'},
+                      'Status': {'FieldType': 5,
+                                 'Value': 'type of Bacillus alcalophilus'},
+                      'Strain from a registered collection': {'FieldType': 20,
+                                                              'Value': 'no'},
+                      'Substrate of isolation': {'FieldType': 5,
+                                                 'Value': 'some substrate'},
+                      'Taxon name': {'FieldType': 109,
+                                     'Value': [{'Name': {'FieldType': 5,
+                                                         'Value': 'Escherichia '
+                                                                  'coli'},
+                                                'RecordId': 100004123,
+                                                'TargetFieldValue': {'DesktopInfo': None,
+                                                                     'DesktopInfoHtml': '<b>Current '
+                                                                                        'name: '
+                                                                                        '</b><i>Escherichia '
+                                                                                        'coli</i> '
+                                                                                        '(Migula '
+                                                                                        '1895) '
+                                                                                        'Castellani '
+                                                                                        'and '
+                                                                                        'Chalmers '
+                                                                                        '1919',
+                                                                     'FieldType': 27,
+                                                                     'NewSynFieldInfo': None,
+                                                                     'ObligateSynonymId': 0,
+                                                                     'OriginalSynFieldInfo': None,
+                                                                     'SynInfo': {'BasionymRecord': {'NameInfo': '',
+                                                                                                    'RecordId': 100004123,
+                                                                                                    'RecordName': '<i>Escherichia '
+                                                                                                                  'coli</i> '
+                                                                                                                  '(Migula '
+                                                                                                                  '1895) '
+                                                                                                                  'Castellani '
+                                                                                                                  'and '
+                                                                                                                  'Chalmers '
+                                                                                                                  '1919',
+                                                                                                    'SecondLevelRecords': None},
+                                                                                 'CurrentNameRecord': {'NameInfo': '',
+                                                                                                       'RecordId': 100004123,
+                                                                                                       'RecordName': '<i>Escherichia '
+                                                                                                                     'coli</i> '
+                                                                                                                     '(Migula '
+                                                                                                                     '1895) '
+                                                                                                                     'Castellani '
+                                                                                                                     'and '
+                                                                                                                     'Chalmers '
+                                                                                                                     '1919',
+                                                                                                       'SecondLevelRecords': None},
+                                                                                 'ObligateSynonymRecords': [],
+                                                                                 'SelectedRecord': {
+                                                                                     'NameInfo': '<i>Escherichia '
+                                                                                                 'coli</i> '
+                                                                                                 '(Migula '
+                                                                                                 '1895) '
+                                                                                                 'Castellani '
+                                                                                                 'and '
+                                                                                                 'Chalmers '
+                                                                                                 '1919',
+                                                                                     'RecordId': 100004123,
+                                                                                     'RecordName': '<i>Escherichia '
+                                                                                                   'coli</i> '
+                                                                                                   '(Migula '
+                                                                                                   '1895) '
+                                                                                                   'Castellani '
+                                                                                                   'and '
+                                                                                                   'Chalmers '
+                                                                                                   '1919',
+                                                                                     'SecondLevelRecords': None},
+                                                                                 'TaxonSynonymsRecords': []},
+                                                                     'SynonymId': 100004123}}]},
+                      'Tested temperature growth range': {'FieldType': 19,
+                                                          'MaxValue': 32.0,
+                                                          'MinValue': 29.0},
+                      'Type description': {'FieldType': 5, 'Value': ''}},
+    'RecordId': 148038,
+    'RecordName': 'MIRRI 2240561'}
+
+STRAIN_WS_EXPECTED_NO_REMOTE = {
+    'Acronym': 'MIRRI',
+    'RecordDetails': {'ABS related files': {'FieldType': 'U',
+                                            'Value': [{'Name': 'link',
+                                                       'Value': 'https://example.com'}]},
+                      'Altitude of geographic origin': {'FieldType': 'D',
+                                                        'Value': 121},
+                      'Applications': {'FieldType': 'E', 'Value': 'health'},
+                      'Collection accession number': {'FieldType': 'E',
+                                                      'Value': 'TESTCC 1'},
+                      'Collection date': {'FieldType': 'H', 'Value': '1991-01-01'},
+                      'Collector': {'FieldType': 'E', 'Value': 'the collector'},
+                      'Comment on taxonomy': {'FieldType': 'E',
+                                              'Value': 'lalalalla'},
+                      'Coordinates of geographic origin': {'FieldType': 'L',
+                                                           'Value': {'Latitude': 23.3,
+                                                                     'Longitude': 23.3}},
+                      'Date of inclusion in the catalogue': {'FieldType': 'H',
+                                                             'Value': '1985-05-02'},
+                      'Deposit date': {'FieldType': 'H', 'Value': '1985-05-02'},
+                      'Depositor': {'FieldType': 'E',
+                                    'Value': 'NCTC, National Collection of Type '
+                                             'Cultures - NCTC, London, United '
+                                             'Kingdom of Great Britain and '
+                                             'Northern Ireland.'},
+                      'Dual use': {'FieldType': 'T', 'Value': 'yes'},
+                      'Enzyme production': {'FieldType': 'E',
+                                            'Value': 'some enzimes'},
+                      'Form': {'FieldType': 'C',
+                               'Value': [{'Name': 'Agar', 'Value': 'yes'},
+                                         {'Name': 'Cryo', 'Value': 'no'},
+                                         {'Name': 'Dry Ice', 'Value': 'no'},
+                                         {'Name': 'Liquid Culture Medium',
+                                          'Value': 'no'},
+                                         {'Name': 'Lyo', 'Value': 'yes'},
+                                         {'Name': 'Oil', 'Value': 'no'},
+                                         {'Name': 'Water', 'Value': 'no'}]},
+                      'GMO': {'FieldType': 'V', 'Value': 'Yes'},
+                      'GMO construction information': {'FieldType': 'E',
+                                                       'Value': 'instructrion to '
+                                                                'build'},
+                      'Genotype': {'FieldType': 'E', 'Value': 'some genotupe'},
+                      'Geographic origin': {'FieldType': 'E',
+                                            'Value': 'una state; one '
+                                                     'municipality; somewhere in '
+                                                     'the world'},
+                      'History': {'FieldType': 'E',
+                                  'Value': 'firstplave < seconn place < third '
+                                           'place'},
+                      'Infrasubspecific names': {'FieldType': 'E',
+                                                 'Value': 'serovar tete'},
+                      'Interspecific hybrid': {'FieldType': 'T', 'Value': 'no'},
+                      'Isolation date': {'FieldType': 'H', 'Value': '1900-01-01'},
+                      'Isolation habitat': {'FieldType': 'E',
+                                            'Value': 'some habitat'},
+                      'Isolator': {'FieldType': 'E', 'Value': 'the isolator'},
+                      'MTA files URL': {'FieldType': 'U',
+                                        'Value': [{'Name': 'link',
+                                                   'Value': 'https://example.com'}]},
+                      'Metabolites production': {'FieldType': 'E',
+                                                 'Value': 'big factory of cheese'},
+                      'Mutant information': {'FieldType': 'E', 'Value': 'x-men'},
+                      'Nagoya protocol restrictions and compliance conditions': {'FieldType': 'T',
+                                                                                 'Value': 'no '
+                                                                                          'known '
+                                                                                          'restrictions '
+                                                                                          'under '
+                                                                                          'the '
+                                                                                          'Nagoya '
+                                                                                          'protocol'},
+                      'Ontobiotope': {'FieldType': 'RLink', 'Value': 'OBT:000190'},
+                      'Organism type': {'FieldType': 'C',
+                                        'Value': [{'Name': 'Algae', 'Value': 'no'},
+                                                  {'Name': 'Archaea',
+                                                   'Value': 'yes'},
+                                                  {'Name': 'Bacteria',
+                                                   'Value': 'no'},
+                                                  {'Name': 'Cyanobacteria',
+                                                   'Value': 'no'},
+                                                  {'Name': 'Filamentous Fungi',
+                                                   'Value': 'no'},
+                                                  {'Name': 'Phage', 'Value': 'no'},
+                                                  {'Name': 'Plasmid',
+                                                   'Value': 'no'},
+                                                  {'Name': 'Virus', 'Value': 'no'},
+                                                  {'Name': 'Yeast',
+                                                   'Value': 'no'}]},
+                      'Other culture collection numbers': {'FieldType': 'E',
+                                                           'Value': 'aaa a; aaa3 '
+                                                                    'a3'},
+                      'Pathogenicity': {'FieldType': 'E', 'Value': 'illness'},
+                      'Plasmids': {'FieldType': 'E', 'Value': 'asda'},
+                      'Plasmids collections fields': {'FieldType': 'E',
+                                                      'Value': 'asdasda'},
+                      'Ploidy': {'FieldType': 'T', 'Value': 'Polyploid'},
+                      'Quarantine in Europe': {'FieldType': 'T', 'Value': 'no'},
+                      'Recommended growth temperature': {'FieldType': 'S',
+                                                         'MaxValue': 30.0,
+                                                         'MinValue': 30.0},
+                      'Remarks': {'FieldType': 'E', 'Value': 'no remarks for me'},
+                      'Restrictions on use': {'FieldType': 'T',
+                                              'Value': 'no restriction apply'},
+                      'Risk group': {'FieldType': 'T', 'Value': '1'},
+                      'Sexual state': {'FieldType': 'E', 'Value': 'MT+A'},
+                      'Status': {'FieldType': 'E',
+                                 'Value': 'type of Bacillus alcalophilus'},
+                      'Strain from a registered collection': {'FieldType': 'T',
+                                                              'Value': 'no'},
+                      'Substrate of isolation': {'FieldType': 'E',
+                                                 'Value': 'some substrate'},
+                      'Taxon name': {'FieldType': 'SynLink',
+                                     'Value': 'Escherichia coli'},
+                      'Tested temperature growth range': {'FieldType': 'S',
+                                                          'MaxValue': 32.0,
+                                                          'MinValue': 29.0}}}
+
+
+class StrainSerializerTest(unittest.TestCase):
+
+    def test_serialize_to_biolomics(self):
+        strain = create_full_data_strain()
+        ws_strain = strain_to_biolomics(strain, client=None)
+        self.assertDictEqual(ws_strain, STRAIN_WS_EXPECTED_NO_REMOTE)
+
+    def test_serialize_to_biolomics_remote(self):
+        client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
+                                      SECRET_ID, USERNAME, PASSWORD)
+        strain = create_full_data_strain()
+        marker = GenomicSequenceBiolomics()
+        marker.marker_id = "MUM 02.15 - Beta tubulin"
+        marker.marker_type = 'TUBB'
+        strain.genetics.markers = [marker]
+        ws_strain = strain_to_biolomics(strain, client=client)
+
+        self.assertEqual(strain.collect.habitat_ontobiotope,
+                         ws_strain['RecordDetails']['Ontobiotope']['Value'][0]['Name']['Value'])
+        self.assertEqual(pycountry.countries.get(alpha_3=strain.collect.location.country).name,
+                         ws_strain['RecordDetails']['Country']['Value'][0]['Name']['Value'])
+        self.assertEqual(strain.publications[0].title,
+                         ws_strain['RecordDetails']['Literature']['Value'][0]['Name']['Value'])
+        self.assertEqual(strain.genetics.markers[0].marker_id,
+                         ws_strain['RecordDetails']['Sequences TUB']['Value'][0]['Name']['Value'])
+
+    def test_serialize_from_biolomics(self):
+        ws_strain = STRAIN_WS
+        strain = strain_from_biolomics(ws_strain)
+        self.assertEqual(strain.record_id, 148038)
+        self.assertEqual(strain.record_name, 'MIRRI 2240561')
+        self.assertEqual(strain.taxonomy.long_name, 'Escherichia coli')
+        self.assertEqual(strain.growth.recommended_media, ['AAA'])
+        self.assertEqual(strain.collect.location.altitude, 121)
+        self.assertEqual(strain.collect.location.country, 'ESP')
+        self.assertEqual(strain.applications, 'health')
+        self.assertEqual(strain.id.strain_id, 'TESTCC 1')
+        self.assertEqual(strain.collect.date.strfdate, '19910101')
+        self.assertEqual(strain.taxonomy.comments, 'lalalalla')
+        self.assertEqual(strain.catalog_inclusion_date.strfdate, '19850502')
+        self.assertIn('NCTC, National Collection of Type ', strain.deposit.who)
+        self.assertTrue(strain.is_potentially_harmful)
+        self.assertEqual(strain.form_of_supply, ['Agar', 'Lyo'])
+        self.assertTrue(strain.genetics.gmo)
+        self.assertEqual(strain.genetics.gmo_construction, 'instructrion to build')
+        self.assertEqual(strain.genetics.genotype, 'some genotupe')
+        self.assertEqual(strain.history, ['newer', 'In the middle', 'older'])
+        self.assertEqual(strain.taxonomy.infrasubspecific_name, 'serovar tete')
+        self.assertEqual(strain.isolation.who, 'the isolator')
+        self.assertEqual(strain.isolation.date.strfdate, '19000101')
+        self.assertEqual(strain.mta_files, ['https://example.com'])
+        self.assertEqual(strain.genetics.mutant_info, 'x-men')
+        self.assertEqual(strain.collect.habitat_ontobiotope, 'OBT:000190')
+        self.assertEqual(strain.taxonomy.organism_type[0].name, 'Archaea')
+        self.assertEqual(strain.other_numbers[0].strain_id, 'aaa a')
+        self.assertEqual(strain.other_numbers[1].strain_id, 'aaa3 a3')
+        self.assertEqual(strain.pathogenicity, 'illness')
+        self.assertEqual(strain.genetics.plasmids, ['asda'])
+        self.assertEqual(strain.genetics.ploidy, 9)
+        self.assertFalse(strain.is_subject_to_quarantine)
+        self.assertEqual(strain.risk_group, '1')
+        self.assertFalse(strain.is_from_registered_collection)
+        self.assertEqual(strain.growth.tested_temp_range, {'min': 29, 'max': 32})
+
+
+BIOLOMICSSEQ = {
+    'RecordDetails': {
+        'Barcode level': {'FieldType': 20, 'Value': 'undefined'},
+        'DNA extract number': {'FieldType': 5, 'Value': ''},
+        'DNA sequence': {'FieldType': 14,
+                         'Value': {'Sequence': 'caaaggaggccttctccctcttcgtaag'}},
+        'Editing state': {'FieldType': 20, 'Value': 'Auto import'},
+        'Forward primer(s)': {'FieldType': 5, 'Value': ''},
+        'Genbank': {'FieldType': 21, 'Value': []},
+        'INSDC number': {'FieldType': 5, 'Value': 'AATGAT'},
+        'Literature': {'FieldType': 21, 'Value': []},
+        'Literature1': {'FieldType': 118, 'Value': []},
+        'Marker name': {'FieldType': 5, 'Value': 'CaM'},
+        'Privacy': {'FieldType': 20, 'Value': 'undefined'},
+        'Quality': {'FieldType': 5, 'Value': ''},
+        'Remarks': {'FieldType': 5, 'Value': ''},
+        'Reverse primer(s)': {'FieldType': 5, 'Value': ''},
+        'Review state': {'FieldType': 5, 'Value': ''},
+        'Strain number': {'FieldType': 5, 'Value': 'MUM 02.54'}},
+    'RecordId': 101,
+    'RecordName': 'MUM 02.54 - CaM'}
+
+
+class SequenceSerializerTest(unittest.TestCase):
+
+    def test_from_biolomics(self):
+        marker = sequence_from_biolomics(BIOLOMICSSEQ)
+        self.assertEqual(marker.record_name, BIOLOMICSSEQ['RecordName'])
+        self.assertEqual(marker.record_id, BIOLOMICSSEQ['RecordId'])
+        self.assertEqual(marker.marker_type, BIOLOMICSSEQ['RecordDetails']['Marker name']['Value'])
+        self.assertEqual(marker.marker_id, BIOLOMICSSEQ['RecordDetails']['INSDC number']['Value'])
+        self.assertEqual(marker.marker_seq, BIOLOMICSSEQ['RecordDetails']['DNA sequence']['Value']['Sequence'])
+
+    def test_to_biolomics(self):
+        marker = GenomicSequenceBiolomics()
+        marker.marker_id = 'GGAAUUA'
+        marker.marker_seq = 'aattgacgat'
+        marker.marker_type = 'CaM'
+        marker.record_name = 'peioMarker'
+        marker.record_id = 111
+        ws_seq = sequence_to_biolomics(marker)
+        expected = {'RecordId': marker.record_id,
+                    'RecordName': marker.record_name,
+                    'RecordDetails': {
+                        'INSDC number': {'Value': marker.marker_id, 'FieldType': 'E'},
+                        'DNA sequence': {'Value': {'Sequence': marker.marker_seq}, 'FieldType': 'N'},
+                        'Marker name': {'Value': marker.marker_type, 'FieldType': 'E'}}}
+
+        self.assertEqual(ws_seq, expected)
+
+
+BIOLOMICS_MEDIUM = {
+    "RecordId": 100,
+    "RecordName": "MA20S",
+    "RecordDetails": {
+        "Full description": {
+            "Value": "mout agar+20% saccharose",
+            "FieldType": 5
+        },
+        "Ingredients": {
+            "Value": "Malt extract\r\n\tDilute brewery malt with water to 10% sugar solution (level 10 on Brix saccharose meter), 15 minutes at 121 C\r\nsaccharose\t200g\r\ndistilled water\t0.6l\r\nagar\t15g\r\n",
+            "FieldType": 5
+        },
+        "Link to full description": {
+            "Value": [],
+            "FieldType": 21
+        },
+        "Medium description": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "Other name": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "pH": {
+            "Value": "7 with KOH",
+            "FieldType": 5
+        },
+        "Remarks": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "Reference": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "Sterilization conditions": {
+            "Value": "15 minutes at 121 C",
+            "FieldType": 5
+        }
+    }
+}
+
+
+class MediumSerializerTest(unittest.TestCase):
+    def test_from_biolomics(self):
+        medium = growth_medium_from_biolomics(BIOLOMICS_MEDIUM)
+        self.assertEqual(medium.record_id, BIOLOMICS_MEDIUM['RecordId'])
+        self.assertEqual(medium.record_name, BIOLOMICS_MEDIUM['RecordName'])
+        self.assertEqual(medium.ingredients, BIOLOMICS_MEDIUM['RecordDetails']['Ingredients']['Value'])
+        self.assertEqual(medium.full_description, BIOLOMICS_MEDIUM['RecordDetails']['Full description']['Value'])
+        self.assertEqual(medium.ph, BIOLOMICS_MEDIUM['RecordDetails']['pH']['Value'])
+
+
+BIOLOMICS_BIBLIOGRAPHY = {
+    "RecordId": 100,
+    "RecordName": "Miscellaneous notes on Mucoraceae",
+    "RecordDetails": {
+        "Associated strains": {
+            "Value": [],
+            "FieldType": 118
+        },
+        "Associated taxa": {
+            "Value": [],
+            "FieldType": 118
+        },
+        "Authors": {
+            "Value": "Schipper, M.A.A.; Samson, R.A.",
+            "FieldType": 5
+        },
+        "Associated sequences": {
+            "Value": [],
+            "FieldType": 118
+        },
+        "Abstract": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "Collection": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "DOI number": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "Editor(s)": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "Full reference": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "Hyperlink": {
+            "Value": [],
+            "FieldType": 21
+        },
+        "ISBN": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "ISSN": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "Issue": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "Journal": {
+            "Value": "Mycotaxon",
+            "FieldType": 5
+        },
+        "Journal-Book": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "Keywords": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "Page from": {
+            "Value": "475",
+            "FieldType": 5
+        },
+        "Page to": {
+            "Value": "491",
+            "FieldType": 5
+        },
+        "Publisher": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "PubMed ID": {
+            "Value": "",
+            "FieldType": 5
+        },
+        "Volume": {
+            "Value": "50",
+            "FieldType": 5
+        },
+        "Year": {
+            "Value": 1994,
+            "FieldType": 4
+        }
+    }
+}
+
+
+class BibliographySerializerTest(unittest.TestCase):
+    def test_from_biolomics(self):
+        pub = literature_from_biolomics(BIOLOMICS_BIBLIOGRAPHY)
+        self.assertEqual(pub.record_name, "Miscellaneous notes on Mucoraceae")
+        self.assertEqual(pub.record_id, 100)
+        self.assertEqual(pub.year, 1994)
+        self.assertEqual(pub.authors, "Schipper, M.A.A.; Samson, R.A.")
+
+    def test_to_biolomics(self):
+        pub = Publication()
+        pub.title = 'My title'
+        pub.year = 1992
+        pub.authors = 'me and myself'
+        pub.pubmed_id = '1112222'
+        pub.issue = 'issue'
+        ws_data = literature_to_biolomics(pub)
+        expected = {
+            'RecordDetails': {
+                'Authors': {'FieldType': 'E', 'Value': 'me and myself'},
+                'PubMed ID': {'FieldType': 'E', 'Value': '1112222'},
+                'Issue': {'FieldType': 'E', 'Value': 'issue'},
+                'Year': {'FieldType': 'D', 'Value': 1992}},
+            'RecordName': 'My title'}
+        self.assertDictEqual(expected, ws_data)
+
+    def test_to_biolomics2(self):
+        pub = Publication()
+        pub.pubmed_id = '1112222'
+        ws_data = literature_to_biolomics(pub)
+        expected = {
+            'RecordDetails': {
+                'PubMed ID': {'FieldType': 'E', 'Value': '1112222'}},
+            'RecordName': f'PUBMED:{pub.pubmed_id}'}
+        self.assertDictEqual(expected, ws_data)
+
+        pub = Publication()
+        pub.doi = 'doi.er/111/12131'
+        ws_data = literature_to_biolomics(pub)
+        expected = {
+            'RecordDetails': {
+                'DOI number': {'FieldType': 'E', 'Value': pub.doi}},
+            'RecordName': f'DOI:{pub.doi}'}
+        self.assertDictEqual(expected, ws_data)
+
+
+if __name__ == "__main__":
+    import sys;
+    sys.argv = ['', 'BibliographySerializerTest']
+    unittest.main()
--- a/tests/biolomics/test_strain_operations.py
+++ b/tests/biolomics/test_strain_operations.py
@ -0,0 +1,156 @@
+import unittest
+
+from mirri.biolomics.remote.endoint_names import STRAIN_WS
+from .utils import VERSION, SERVER_URL, create_full_data_strain
+from mirri.biolomics.settings import CLIENT_ID, SECRET_ID, USERNAME, PASSWORD
+from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient
+from mirri.biolomics.pipelines.strain import retrieve_strain_by_accession_number
+
+
+class BiolomicsStrainClientTest(unittest.TestCase):
+    def setUp(self):
+        self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
+                                           SECRET_ID, USERNAME, PASSWORD)
+
+    def test_retrieve_strain_by_id(self):
+        record_id = 14803
+        strain = self.client.retrieve_by_id(STRAIN_WS, record_id)
+        self.assertEqual(strain.record_id, record_id)
+        print(strain.record_name)
+
+    def test_retrieve_strain_by_name(self):
+        record_id = 14803
+        record_name = 'MIRRI0014803'
+        strain = self.client.retrieve_by_name(STRAIN_WS, record_name)
+        self.assertEqual(strain.record_name, record_name)
+        self.assertEqual(strain.record_id, record_id)
+
+    def test_search_strain(self):
+        accession_number = "BEA 0014B"
+        query = {"Query": [{"Index": 0,
+                            "FieldName": "Collection accession number",
+                            "Operation": "TextExactMatch",
+                            "Value": accession_number}],
+                 "Expression": "Q0",
+                 "DisplayStart": 0,
+                 "DisplayLength": 10}
+
+        search_response = self.client.search(STRAIN_WS, query)
+
+        self.assertEqual(search_response['total'], 1)
+        self.assertEqual(search_response['records'][0].id.strain_id,
+                         accession_number)
+
+    def test_search_strain4(self):
+        accession_number = "TESTCC 1"
+        query = {"Query": [{"Index": 0,
+                            "FieldName": "Collection accession number",
+                            "Operation": "TextExactMatch",
+                            "Value": accession_number}],
+                 "Expression": "Q0",
+                 "DisplayStart": 0,
+                 "DisplayLength": 10}
+
+        search_response = self.client.search(STRAIN_WS, query)
+        for strain in search_response['records']:
+            print(strain)
+            self.client.delete_by_id(STRAIN_WS, strain.record_id)
+
+    def test_search_strain_no_found(self):
+        accession_number = "BEA 0014B_"
+        query = {"Query": [{"Index": 0,
+                            "FieldName": "Collection accession number",
+                            "Operation": "TextExactMatch",
+                            "Value": accession_number}],
+                 "Expression": "Q0",
+                 "DisplayStart": 0,
+                 "DisplayLength": 10}
+
+        search_response = self.client.search(STRAIN_WS, query)
+
+        self.assertEqual(search_response['total'], 0)
+        self.assertFalse(search_response['records'])
+
+    def test_create_strain(self):
+        strain = create_full_data_strain()
+        strain.taxonomy.interspecific_hybrid = None
+        record_id = None
+        try:
+            new_strain = self.client.create(STRAIN_WS, strain)
+            record_id = new_strain.record_id
+            self.assertIsNone(new_strain.taxonomy.interspecific_hybrid)
+            self.assertEqual(new_strain.growth.recommended_media, ['AAA'])
+            self.assertEqual(new_strain.id.strain_id, strain.id.strain_id)
+        finally:
+            if record_id is not None:
+                self.client.delete_by_id(STRAIN_WS, record_id)
+
+    def test_update_strain(self):
+        strain = create_full_data_strain()
+        record_id = None
+        try:
+            new_strain = self.client.create(STRAIN_WS, strain)
+            record_id = new_strain.record_id
+            self.assertEqual(new_strain.id.strain_id, strain.id.strain_id)
+            self.assertFalse(new_strain.taxonomy.interspecific_hybrid)
+            new_strain.id.number = '2'
+            new_strain.taxonomy.interspecific_hybrid = None
+            updated_strain = self.client.update(STRAIN_WS, new_strain)
+            self.assertEqual(updated_strain.id.strain_id, new_strain.id.strain_id)
+            self.assertIsNone(updated_strain.taxonomy.interspecific_hybrid)
+
+            retrieved_strain = self.client.retrieve_by_id(STRAIN_WS, record_id)
+            self.assertEqual(retrieved_strain.id.strain_id, new_strain.id.strain_id)
+            self.assertIsNone(retrieved_strain.taxonomy.interspecific_hybrid)
+        finally:
+            if record_id is not None:
+                print('deleting')
+                self.client.delete_by_id(STRAIN_WS, record_id)
+
+    def test_update_strain_pathogenicity(self):
+        strain = create_full_data_strain()
+        print(strain.pathogenicity)
+        record_id = None
+        try:
+            new_strain = self.client.create(STRAIN_WS, strain)
+            record_id = new_strain.record_id
+            self.assertEqual(new_strain.id.strain_id, strain.id.strain_id)
+            self.assertEqual(new_strain.pathogenicity, 'illness')
+
+            new_strain.pathogenicity = None
+            updated_strain = self.client.update(STRAIN_WS, new_strain)
+            self.assertEqual(updated_strain.id.strain_id, new_strain.id.strain_id)
+            self.assertIsNone(updated_strain.pathogenicity)
+
+            retrieved_strain = self.client.retrieve_by_id(STRAIN_WS, record_id)
+            self.assertEqual(retrieved_strain.id.strain_id, new_strain.id.strain_id)
+            self.assertIsNone(retrieved_strain.pathogenicity)
+        finally:
+            if record_id is not None:
+                self.client.delete_by_id(STRAIN_WS, record_id)
+
+    def test_search_by_accession_number(self):
+        accession_number = "BEA 0014B"
+        strain = retrieve_strain_by_accession_number(self.client, accession_number)
+        self.assertEqual(strain.id.strain_id, accession_number)
+
+    def test_search_by_accession_number(self):
+        accession_number = "BEA 0014B_"
+        strain = retrieve_strain_by_accession_number(self.client, accession_number)
+        self.assertFalse(strain)
+
+
+class BiolomicsClientGrowthMediaTest(unittest.TestCase):
+    def setUp(self):
+        self.client = BiolomicsMirriClient(SERVER_URL, VERSION, CLIENT_ID,
+                                           SECRET_ID, USERNAME, PASSWORD)
+
+    def xtest_growth_media_by_name(self):
+        gm = self.client.retrieve('growth_media', 'AAA')
+        self.assertEqual(gm['Record Id'], 1)
+
+
+if __name__ == "__main__":
+    # import sys;sys.argv = ['',
+    #                        'BiolomicsWriter.test_mirri_excel_parser_invalid']
+    unittest.main()
--- a/tests/biolomics/utils.py
+++ b/tests/biolomics/utils.py
@ -0,0 +1,99 @@
+from mirri.biolomics.serializers.strain import StrainMirri
+from mirri.entities.strain import StrainId, OrganismType
+from mirri.entities.sequence import GenomicSequence
+from mirri.entities.date_range import DateRange
+from mirri.entities.publication import Publication
+from mirri.settings import NAGOYA_NO_RESTRICTIONS
+
+VERSION = 'v2'
+SERVER_URL = 'https://webservices.bio-aware.com/mirri_test'
+
+
+def create_full_data_strain():
+    strain = StrainMirri()
+
+    strain.id.number = "1"
+    strain.id.collection = "TESTCC"
+    strain.id.url = "https://cect/2342"
+
+    strain.restriction_on_use = "no_restriction"
+    strain.nagoya_protocol = NAGOYA_NO_RESTRICTIONS
+    strain.abs_related_files = ['https://example.com']
+    strain.mta_files = ['https://example.com']
+    strain.other_numbers.append(StrainId(collection="aaa", number="a"))
+    strain.other_numbers.append(StrainId(collection="aaa3", number="a3"))
+    strain.is_from_registered_collection = False
+    strain.risk_group = '1'
+    strain.is_potentially_harmful = True
+    strain.is_subject_to_quarantine = False
+
+    strain.taxonomy.organism_type = [OrganismType(2)]
+    strain.taxonomy.genus = 'Escherichia'
+    strain.taxonomy.species = 'coli'
+    strain.taxonomy.interspecific_hybrid = False
+    strain.taxonomy.infrasubspecific_name = 'serovar tete'
+    strain.taxonomy.comments = 'lalalalla'
+
+    strain.status = "type of Bacillus alcalophilus"
+    strain.history = 'firstplave < seconn place < third place'
+
+    strain.deposit.who = "NCTC, National Collection of Type Cultures - NCTC, London, United Kingdom of Great Britain and Northern Ireland."
+    strain.deposit.date = DateRange(year=1985, month=5, day=2)
+    strain.catalog_inclusion_date = DateRange(year=1985, month=5, day=2)
+
+    strain.collect.location.country = "ESP"
+    strain.collect.location.state = "una state"
+    strain.collect.location.municipality = "one municipality"
+    strain.collect.location.longitude = 23.3
+    strain.collect.location.latitude = 23.3
+    strain.collect.location.altitude = 121
+    strain.collect.location.site = "somewhere in the world"
+    strain.collect.habitat_ontobiotope = "OBT:000190"
+    strain.collect.habitat = 'some habitat'
+    strain.collect.who = "the collector"
+    strain.collect.date = DateRange(year=1991)
+
+    strain.isolation.date = DateRange(year=1900)
+    strain.isolation.who = 'the isolator'
+    strain.isolation.substrate_host_of_isolation = 'some substrate'
+
+    # already existing media in test_mirri
+
+    strain.growth.recommended_temp = {'min': 30, 'max': 30}
+    strain.growth.recommended_media = ["AAA"]
+    strain.growth.tested_temp_range = {'min': 29, 'max': 32}
+
+    strain.form_of_supply = ["Agar", "Lyo"]
+
+    #strain.other_denominations = ["lajdflasjdldj"]
+
+    gen_seq = GenomicSequence()
+    gen_seq.marker_id = "pepe"
+    gen_seq.marker_type = "16S rRNA"
+    strain.genetics.markers.append(gen_seq)
+    strain.genetics.ploidy = 9
+    strain.genetics.genotype = 'some genotupe'
+    strain.genetics.gmo = True
+    strain.genetics.gmo_construction = 'instructrion to build'
+    strain.genetics.mutant_info = 'x-men'
+    strain.genetics.sexual_state = 'MT+A'
+    strain.genetics.plasmids = ['asda']
+    strain.genetics.plasmids_in_collections = ['asdasda']
+
+    pub = Publication()
+    pub.title = "The genus Amylomyces"
+    strain.publications = [pub]
+
+    strain.plant_pathogenicity_code = 'PATH:001'
+    strain.pathogenicity = 'illness'
+    strain.enzyme_production = 'some enzimes'
+    strain.production_of_metabolites = 'big factory of cheese'
+    strain.applications = 'health'
+
+    strain.remarks = 'no remarks for me'
+    return strain
+
+
+if __name__ == '__main__':
+    strain = create_full_data_strain()
+    print(strain.collect.habitat_ontobiotope)
--- a/tests/data/invalid_content.mirri.xlsx
+++ b/tests/data/invalid_content.mirri.xlsx
--- a/tests/data/invalid_excel.mirri.json
+++ b/tests/data/invalid_excel.mirri.json
@ -0,0 +1,5 @@
+{
+    "key1": "value1",
+    "key2": "value2",
+    "key3": "value3"
+}
--- a/tests/data/invalid_structure.mirri.xlsx
+++ b/tests/data/invalid_structure.mirri.xlsx
--- a/tests/data/valid.mirri.full.xlsx
+++ b/tests/data/valid.mirri.full.xlsx
--- a/tests/data/valid.mirri.xlsx
+++ b/tests/data/valid.mirri.xlsx
--- a/tests/test_entities.py
+++ b/tests/test_entities.py
@ -0,0 +1,318 @@
+"""
+Created on 2020(e)ko abe. 2(a)
+
+@author: peio
+"""
+
+import unittest
+
+from mirri.entities.publication import Publication
+from mirri.entities.date_range import DateRange
+from mirri.entities.location import Location
+from mirri.entities.sequence import GenomicSequence
+from mirri.entities.strain import (
+    Collect,
+    Deposit,
+    Isolation,
+    ValidationError,
+    OrganismType,
+    Strain,
+    StrainId,
+    Taxonomy,
+)
+from mirri.settings import (
+    COLLECT,
+    COUNTRY,
+    DATE_OF_ISOLATION,
+    DEPOSIT,
+    DEPOSITOR,
+    GENETICS,
+    GROWTH,
+    ISOLATED_BY,
+    ISOLATION,
+    LOCATION,
+    MARKERS,
+    NAGOYA_DOCS_AVAILABLE,
+    NAGOYA_PROTOCOL,
+    ORGANISM_TYPE,
+    OTHER_CULTURE_NUMBERS,
+    PLOIDY,
+    RECOMMENDED_GROWTH_MEDIUM,
+    TAXONOMY,
+    DATE_OF_INCLUSION, NO_RESTRICTION
+)
+from mirri.validation.entity_validators import validate_strain
+
+
+class TestDataRange(unittest.TestCase):
+    def test_data_range_init(self):
+        dr = DateRange()
+
+        self.assertFalse(dr)
+
+        self.assertEqual(dr.__str__(), "")
+        self.assertEqual(dr.range["start"], None)
+        self.assertEqual(dr.range["end"], None)
+
+        dr.strpdate("2012")
+        self.assertEqual(dr.strfdate, "2012----")
+        self.assertTrue(dr)
+
+        dr.strpdate("2012----")
+        self.assertEqual(dr.strfdate, "2012----")
+
+        dr.strpdate("201212--")
+        self.assertEqual(dr.strfdate, "201212--")
+        try:
+            dr.strpdate("201213--")
+            self.fail()
+        except ValueError:
+            pass
+
+        try:
+            dr = DateRange(year=2012, month=13)
+            self.fail()
+        except ValueError:
+            pass
+
+        dr = DateRange(year=2020)
+        self.assertEqual(dr.strfdate, "2020----")
+
+        dr2 = dr.strpdate("2012")
+        self.assertEqual(dr2.range["start"].year, 2012)
+        self.assertEqual(dr2.range["start"].month, 1)
+        self.assertEqual(dr2.range["start"].day, 1)
+
+        self.assertEqual(dr2.range["end"].year, 2012)
+        self.assertEqual(dr2.range["end"].month, 12)
+        self.assertEqual(dr2.range["end"].day, 31)
+
+
+class TestCollect(unittest.TestCase):
+    def test_collect_basic(self):
+        collect = Collect()
+        self.assertEqual(collect.dict(), {})
+
+        collect.location.country = "ESP"
+        collect.date = DateRange().strpdate("2012----")
+
+        collect.who = "pepito"
+        self.assertEqual(
+            dict(collect.dict()),
+            {
+                "location": {"countryOfOriginCode": "ESP"},
+                "collected_by": "pepito",
+                "date_of_collection": "2012----",
+            },
+        )
+        self.assertEqual(collect.__str__(),
+                         "Collected: Spain in 2012---- by pepito")
+
+
+class TestOrganismType(unittest.TestCase):
+    def test_basic_usage(self):
+        org_type = OrganismType(2)
+        self.assertEqual(org_type.name, "Archaea")
+        self.assertEqual(org_type.code, 2)
+        try:
+            org_type.ko = 'a'
+            self.fail()
+        except TypeError:
+            pass
+
+        org_type = OrganismType("Archaea")
+
+
+class TestTaxonomy(unittest.TestCase):
+    def test_taxonomy_basic(self):
+        taxonomy = Taxonomy()
+        self.assertEqual(taxonomy.dict(), {})
+        self.assertFalse(taxonomy)
+
+    def test_taxonomy_with_data(self):
+        taxonomy = Taxonomy()
+        taxonomy.genus = "Bacilus"
+        taxonomy.organism_type = [OrganismType("Archaea")]
+        taxonomy.species = "vulgaris"
+        self.assertEqual(taxonomy.long_name, "Bacilus vulgaris")
+
+        # print(taxonomy.dict())
+
+
+class TestLocation(unittest.TestCase):
+    def test_empty_init(self):
+        loc = Location()
+        self.assertEqual(loc.dict(), {})
+        self.assertFalse(loc)
+
+    def test_add_data(self):
+        loc = Location()
+        loc.country = "esp"
+        self.assertEqual(loc.dict(), {COUNTRY: "esp"})
+        loc.state = None
+        self.assertEqual(loc.dict(), {COUNTRY: "esp"})
+
+
+class TestStrain(unittest.TestCase):
+    def test_empty_strain(self):
+        strain = Strain()
+        self.assertEqual(strain.dict(), {})
+
+    def test_strain_add_data(self):
+        strain = Strain()
+
+        strain.id.number = "5433"
+        strain.id.collection = "CECT"
+        strain.id.url = "https://cect/2342"
+
+        try:
+            strain.nagoya_protocol = "asdas"
+            self.fail()
+        except ValidationError:
+            pass
+
+        strain.nagoya_protocol = NAGOYA_DOCS_AVAILABLE
+        strain.dict()[NAGOYA_PROTOCOL] = NAGOYA_DOCS_AVAILABLE
+
+        strain.collect.location.country = "ESP"
+
+        self.assertEqual(strain.dict()[COLLECT][LOCATION][COUNTRY], "ESP")
+
+        strain.genetics.ploidy = 9
+        self.assertEqual(strain.dict()[GENETICS][PLOIDY], 9)
+
+        strain.growth.recommended_media = ["asd"]
+        strain.isolation.date = DateRange(year=1900)
+        self.assertEqual(strain.dict()[ISOLATION]
+                         [DATE_OF_ISOLATION], "1900----")
+
+        strain.deposit.who = "pepe"
+        self.assertEqual(strain.dict()[DEPOSIT][DEPOSITOR], "pepe")
+
+        strain.growth.recommended_media = ["11"]
+        self.assertEqual(strain.dict()[GROWTH]
+                         [RECOMMENDED_GROWTH_MEDIUM], ["11"])
+
+        strain.taxonomy.organism_type = [OrganismType(2)]
+        self.assertEqual(
+            strain.dict()[TAXONOMY][ORGANISM_TYPE], [
+                {"code": 2, "name": "Archaea"}]
+        )
+
+        strain.taxonomy.organism_type = [OrganismType("Algae")]
+        self.assertEqual(
+            strain.dict()[TAXONOMY][ORGANISM_TYPE], [
+                {"code": 1, "name": "Algae"}]
+        )
+
+        strain.other_numbers.append(StrainId(collection="aaa", number="a"))
+        strain.other_numbers.append(StrainId(collection="aaa3", number="a3"))
+        self.assertEqual(
+            strain.dict()[OTHER_CULTURE_NUMBERS],
+            [
+                {"collection_code": "aaa", "accession_number": "a"},
+                {"collection_code": "aaa3", "accession_number": "a3"},
+            ],
+        )
+        strain.form_of_supply = ["Agar", "Lyo"]
+        gen_seq = GenomicSequence()
+        self.assertEqual(gen_seq.dict(), {})
+        gen_seq.marker_id = "pepe"
+        gen_seq.marker_type = "16S rRNA"
+        strain.genetics.markers.append(gen_seq)
+        self.assertEqual(
+            strain.dict()[GENETICS][MARKERS],
+            [{"marker_type": "16S rRNA", "INSDC": "pepe"}],
+        )
+
+        strain.collect.habitat_ontobiotope = "OBT:111111"
+        self.assertEqual(strain.collect.habitat_ontobiotope, "OBT:111111")
+
+        try:
+            strain.collect.habitat_ontobiotope = "OBT:11111"
+            self.fail()
+        except ValidationError:
+            pass
+
+        # publications
+        try:
+            strain.publications = 1
+            self.fail()
+        except ValidationError:
+            pass
+        pub = Publication()
+        pub.id = "1"
+        try:
+            strain.publications = pub
+            self.fail()
+        except ValidationError:
+            pass
+
+        strain.publications = [pub]
+        self.assertEqual(strain.publications[0].id, "1")
+
+        strain.catalog_inclusion_date = DateRange(year=1992)
+        self.assertEqual(strain.dict()[DATE_OF_INCLUSION], '1992----')
+
+        import pprint
+
+        pprint.pprint(strain.dict())
+
+    def test_strain_validation(self):
+        strain = Strain()
+        strain.form_of_supply = ['Lyo']
+
+        return
+
+        errors = validate_strain(strain)
+        self.assertEqual(len(errors), 10)
+
+        strain.id.collection = 'test'
+        strain.id.number = '1'
+
+
+        errors = validate_strain(strain)
+        self.assertEqual(len(errors), 9)
+
+        strain.nagoya_protocol = NAGOYA_DOCS_AVAILABLE
+        strain.restriction_on_use = NO_RESTRICTION
+        strain.risk_group = 1
+        strain.taxonomy.organism_type = [OrganismType(4)]
+        strain.taxonomy.hybrids = ['Sac lac', 'Sac lcac3']
+        strain.growth.recommended_media = ['aa']
+        strain.growth.recommended_temp = {'min': 2, 'max':5}
+        strain.form_of_supply = ['lyo']
+        strain.collect.location.country = 'ESP'
+        errors = validate_strain(strain)
+        self.assertFalse(errors)
+
+
+class TestIsolation(unittest.TestCase):
+    def test_iniatialize_isollation(self):
+        isolation = Isolation()
+        self.assertEqual(isolation.dict(), {})
+        isolation.who = "pepito"
+        self.assertTrue(ISOLATED_BY in isolation.dict())
+        isolation.date = DateRange().strpdate("2012----")
+        self.assertTrue(DATE_OF_ISOLATION in isolation.dict())
+
+        try:
+            isolation.location.site = "spain"
+            self.fail()
+        except (ValueError, AttributeError):
+            pass
+
+
+class TestGenomicSequence(unittest.TestCase):
+    def test_empty_init(self):
+        gen_seq = GenomicSequence()
+        self.assertEqual(gen_seq.dict(), {})
+        gen_seq.marker_id = "pepe"
+        gen_seq.marker_type = "16S rRNA"
+        self.assertEqual(gen_seq.dict(), {
+                         "marker_type": "16S rRNA", "INSDC": "pepe"})
+
+
+if __name__ == "__main__":
+    # import sys;sys.argv = ['', 'TestStrain']
+    unittest.main()
--- a/tests/test_parsers.py
+++ b/tests/test_parsers.py
@ -0,0 +1,51 @@
+from mirri.entities.strain import ValidationError
+import unittest
+from pathlib import Path
+from pprint import pprint
+from mirri.io.parsers.mirri_excel import parse_mirri_excel
+
+TEST_DATA_DIR = Path(__file__).parent / "data"
+
+
+class MirriExcelTests(unittest.TestCase):
+
+    def test_mirri_excel_parser(self):
+        in_path = TEST_DATA_DIR / "valid.mirri.xlsx"
+        with in_path.open("rb") as fhand:
+            parsed_data = parse_mirri_excel(fhand, version="20200601")
+
+        medium = parsed_data["growth_media"][0]
+        self.assertEqual("1", medium.acronym)
+        self.assertEqual(medium.description, "NUTRIENT BROTH/AGAR I")
+
+        strains = list(parsed_data["strains"])
+        strain = strains[0]
+        self.assertEqual(strain.publications[0].id, 1)
+        self.assertEqual(strain.publications[0].title, 'Cosa')
+        self.assertEqual(strain.id.number, "1")
+        pprint(strain.dict())
+
+    def xtest_mirri_excel_parser_invalid_fail(self):
+        in_path = TEST_DATA_DIR / "invalid.mirri.xlsx"
+        with in_path.open("rb") as fhand:
+            try:
+                parse_mirri_excel(fhand, version="20200601")
+                self.fail()
+            except ValidationError:
+                pass
+
+    def xtest_mirri_excel_parser_invalid(self):
+        in_path = TEST_DATA_DIR / "invalid.mirri.xlsx"
+        with in_path.open("rb") as fhand:
+            parsed_data = parse_mirri_excel(
+                fhand, version="20200601")
+
+        errors = parsed_data["errors"]
+        for _id, _errors in errors.items():
+            print(_id, _errors)
+
+
+if __name__ == "__main__":
+    # import sys;sys.argv = ['',
+    #                        'MirriExcelTests.test_mirri_excel_parser_invalid']
+    unittest.main()
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@ -0,0 +1,589 @@
+from datetime import datetime
+import unittest
+from pathlib import Path
+from itertools import chain
+
+from mirri.validation.tags import (
+    CHOICES,
+    COORDINATES,
+    CROSSREF,
+    CROSSREF_NAME,
+    DATE,
+    MATCH,
+    MISSING,
+    MULTIPLE,
+    NUMBER,
+    REGEXP,
+    SEPARATOR,
+    TAXON,
+    TYPE,
+    UNIQUE,
+    VALUES
+)
+
+from mirri.validation.excel_validator import (
+    is_valid_choices,
+    is_valid_coords,
+    is_valid_crossrefs,
+    is_valid_date,
+    is_valid_missing,
+    is_valid_number,
+    is_valid_regex,
+    is_valid_taxon,
+    is_valid_unique,
+    is_valid_file,
+    validate_mirri_excel,
+)
+
+
+TEST_DATA_DIR = Path(__file__).parent / "data"
+TS_VALUE = "value"
+TS_CONF = "conf"
+TS_ASSERT = "assert_func"
+
+
+class MirriExcelValidationTests(unittest.TestCase):
+
+    def test_validation_structure(self):
+        in_path = TEST_DATA_DIR / "invalid_structure.mirri.xlsx"
+        with in_path.open("rb") as fhand:
+            error_log = validate_mirri_excel(fhand)
+
+        entities = []
+        err_codes = []
+        for ett, errors in error_log.get_errors().items():
+            entities.append(ett)
+            err_codes.extend([err.code for err in errors])
+
+        self.assertIn("EFS", entities)
+        self.assertIn("STD", entities)
+        self.assertIn("GOD", entities)
+        self.assertIn("GMD", entities)
+
+        self.assertIn("EFS03", err_codes)
+        self.assertIn("EFS06", err_codes)
+        self.assertIn("EFS08", err_codes)
+        self.assertIn("GOD06", err_codes)
+        self.assertIn("GMD01", err_codes)
+        self.assertIn("STD05", err_codes)
+        self.assertIn("STD08", err_codes)
+        self.assertIn("STD12", err_codes)
+
+    def test_validation_content(self):
+        in_path = TEST_DATA_DIR / "invalid_content.mirri.xlsx"
+        with in_path.open("rb") as fhand:
+            error_log = validate_mirri_excel(fhand)
+
+        entities = []
+        err_codes = []
+        for ett, errors in error_log.get_errors().items():
+            entities.append(ett)
+            err_codes.extend([err.code for err in errors])
+
+        self.assertTrue(len(err_codes) > 0)
+
+        self.assertNotIn("EFS", entities)
+        self.assertIn("STD", entities)
+        self.assertIn("GOD", entities)
+        self.assertIn("GID", entities)
+
+        self.assertIn("GOD04", err_codes)
+        self.assertIn("GOD07", err_codes)
+        self.assertIn("GID03", err_codes)
+        self.assertIn("STD11", err_codes)
+        self.assertIn("STD15", err_codes)
+        self.assertIn("STD22", err_codes)
+        self.assertIn("STD04", err_codes)
+        self.assertIn("STD10", err_codes)
+        self.assertIn("STD07", err_codes)
+        self.assertIn("STD14", err_codes)
+        self.assertIn("STD16", err_codes)
+
+    def test_validation_valid(self):
+        in_path = TEST_DATA_DIR / "valid.mirri.xlsx"
+        with in_path.open("rb") as fhand:
+            error_log = validate_mirri_excel(fhand)
+
+        self.assertTrue(len(error_log.get_errors()) == 0)
+
+
+class ValidatoionFunctionsTest(unittest.TestCase):
+
+    def test_is_valid_regex(self):
+        tests = [
+            {
+                TS_VALUE: "abcDEF",
+                TS_CONF: {TYPE: REGEXP, MATCH: r"[a-zA-Z]+"},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "123456",
+                TS_CONF: {TYPE: REGEXP, MATCH: r"[a-zA-Z]+"},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: "123456",
+                TS_CONF: {TYPE: REGEXP, MATCH: r"\d+"},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "abcdef",
+                TS_CONF: {TYPE: REGEXP, MATCH: r"\d+"},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: "abc 123",
+                TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "123 abc",
+                TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "123      ",
+                TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
+                TS_ASSERT: self.assertFalse
+            },
+        ]
+
+        for test in tests:
+            value = test[TS_VALUE]
+            conf = test[TS_CONF]
+            assert_func = test[TS_ASSERT]
+            with self.subTest(value=value):
+                assert_func(is_valid_regex(value, conf))
+
+    def test_is_valid_choices(self):
+        tests = [
+            {
+                TS_VALUE: "1",
+                TS_CONF: {TYPE: CHOICES, VALUES: ["1", "2", "3", "4"]},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "1, 3",
+                TS_CONF: {
+                    TYPE: CHOICES,
+                    VALUES: ["1", "2", "3", "4"],
+                    MULTIPLE: True,
+                    SEPARATOR: ","
+                },
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "5",
+                TS_CONF: {TYPE: CHOICES, VALUES: ["1", "2", "3", "4"]},
+                TS_ASSERT: self.assertFalse
+            },
+        ]
+
+        for test in tests:
+            value = test[TS_VALUE]
+            conf = test[TS_CONF]
+            assert_func = test[TS_ASSERT]
+            with self.subTest(value=value):
+                assert_func(is_valid_choices(value, conf))
+
+    def test_is_valid_crossref(self):
+        tests = [
+            {
+                TS_VALUE: "abc",
+                TS_CONF: {
+                    TYPE: CROSSREF,
+                    CROSSREF_NAME: "values",
+                    "crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
+                },
+                TS_ASSERT: self.assertTrue,
+            },
+            {
+                TS_VALUE: "123",
+                TS_CONF: {
+                    TYPE: CROSSREF,
+                    CROSSREF_NAME: "values",
+                    "crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
+                },
+                TS_ASSERT: self.assertFalse,
+            },
+            {
+                TS_VALUE: "abc, def",
+                TS_CONF: {
+                    TYPE: CROSSREF,
+                    CROSSREF_NAME: "values",
+                    "crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
+                    MULTIPLE: True,
+                    SEPARATOR: ",",
+                },
+                TS_ASSERT: self.assertTrue,
+            },
+            {
+                TS_VALUE: "abc, 123",
+                TS_CONF: {
+                    TYPE: CROSSREF,
+                    CROSSREF_NAME: "values",
+                    "crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
+                    MULTIPLE: True,
+                    SEPARATOR: ",",
+                },
+                TS_ASSERT: self.assertFalse,
+            },
+        ]
+
+        for test in tests:
+            value = test[TS_VALUE]
+            conf = test[TS_CONF]
+            assert_func = test[TS_ASSERT]
+            with self.subTest(value=value):
+                assert_func(is_valid_crossrefs(value, conf))
+
+    def test_is_valid_missing(self):
+        tests = [
+            {
+                TS_VALUE: 1,
+                TS_CONF: {TYPE: MISSING},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "abc",
+                TS_CONF: {TYPE: MISSING},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: None,
+                TS_CONF: {TYPE: MISSING},
+                TS_ASSERT: self.assertFalse
+            },
+        ]
+
+        for test in tests:
+            value = test[TS_VALUE]
+            conf = test[TS_CONF]
+            assert_func = test[TS_ASSERT]
+            with self.subTest(value=value):
+                assert_func(is_valid_missing(value, conf))
+
+    def test_is_valid_date(self):
+        tests = [
+            {
+                TS_VALUE: '2020-04-07',
+                TS_CONF: {TYPE: DATE},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: '2020/04/07',
+                TS_CONF: {TYPE: DATE},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: datetime(2021, 5, 1),
+                TS_CONF: {TYPE: DATE},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: '2020-05',
+                TS_CONF: {TYPE: DATE},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: '2020/05',
+                TS_CONF: {TYPE: DATE},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: 2020,
+                TS_CONF: {TYPE: DATE},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: '2021 05 01',
+                TS_CONF: {TYPE: DATE},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: '04-07-2020',
+                TS_CONF: {TYPE: DATE},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: '2021-02-31',
+                TS_CONF: {TYPE: DATE},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: '2021-15',
+                TS_CONF: {TYPE: DATE},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: '15-2021',
+                TS_CONF: {TYPE: DATE},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: 3000,
+                TS_CONF: {TYPE: DATE},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: -2020,
+                TS_CONF: {TYPE: DATE},
+                TS_ASSERT: self.assertFalse
+            },
+        ]
+
+        for test in tests:
+            value = test[TS_VALUE]
+            conf = test[TS_CONF]
+            assert_func = test[TS_ASSERT]
+            with self.subTest(value=value):
+                assert_func(is_valid_date(value, conf))
+
+    def test_is_valid_coordinates(self):
+        tests = [
+            {
+                TS_VALUE: "23; 50",
+                TS_CONF: {TYPE: COORDINATES},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "-90; -100",
+                TS_CONF: {TYPE: COORDINATES},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "90; 100",
+                TS_CONF: {TYPE: COORDINATES},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "0; 0",
+                TS_CONF: {TYPE: COORDINATES},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "10; 20; 5",
+                TS_CONF: {TYPE: COORDINATES},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "10; 20; -5",
+                TS_CONF: {TYPE: COORDINATES},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "91; 50",
+                TS_CONF: {TYPE: COORDINATES},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: "87; 182",
+                TS_CONF: {TYPE: COORDINATES},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: "-200; 182",
+                TS_CONF: {TYPE: COORDINATES},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: "20, 40",
+                TS_CONF: {TYPE: COORDINATES},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: "abc def",
+                TS_CONF: {TYPE: COORDINATES},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: 123,
+                TS_CONF: {TYPE: COORDINATES},
+                TS_ASSERT: self.assertFalse
+            },
+        ]
+
+        for test in tests:
+            value = test[TS_VALUE]
+            conf = test[TS_CONF]
+            assert_func = test[TS_ASSERT]
+            with self.subTest(value=value):
+                assert_func(is_valid_coords(value, conf))
+
+    def test_is_valid_number(self):
+        tests = [
+            {
+                TS_VALUE: 1,
+                TS_CONF: {TYPE: NUMBER},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: 2.5,
+                TS_CONF: {TYPE: NUMBER},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "10",
+                TS_CONF: {TYPE: NUMBER},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "10.5",
+                TS_CONF: {TYPE: NUMBER},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: 5,
+                TS_CONF: {TYPE: NUMBER, "min": 0},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: 5,
+                TS_CONF: {TYPE: NUMBER, "max": 10},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: 5,
+                TS_CONF: {TYPE: NUMBER, "min": 0, "max": 10},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "hello",
+                TS_CONF: {TYPE: NUMBER},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: 10,
+                TS_CONF: {TYPE: NUMBER, "max": 5},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: 0,
+                TS_CONF: {TYPE: NUMBER, "min": 5},
+                TS_ASSERT: self.assertFalse
+            },
+        ]
+
+        for test in tests:
+            value = test[TS_VALUE]
+            conf = test[TS_CONF]
+            assert_func = test[TS_ASSERT]
+            with self.subTest(value=value):
+                assert_func(is_valid_number(value, conf))
+
+    def test_is_valid_taxon(self):
+        tests = [
+            {
+                TS_VALUE: 'sp. species',
+                TS_CONF: {TYPE: TAXON},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: 'spp species subsp. subspecies',
+                TS_CONF: {TYPE: TAXON},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: 'spp species subsp. subspecies var. variety',
+                TS_CONF: {TYPE: TAXON},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: 'spp taxon',
+                TS_CONF: {TYPE: TAXON},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: 'Candidaceae',
+                TS_CONF: {TYPE: TAXON},
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: 'sp sp species',
+                TS_CONF: {TYPE: TAXON},
+                TS_ASSERT: self.assertFalse
+            },
+            {
+                TS_VALUE: 'spp species abc. def',
+                TS_CONF: {TYPE: TAXON},
+                TS_ASSERT: self.assertFalse
+            },
+        ]
+
+        for test in tests:
+            value = test[TS_VALUE]
+            conf = test[TS_CONF]
+            assert_func = test[TS_ASSERT]
+            with self.subTest(value=value):
+                assert_func(is_valid_taxon(value, conf))
+
+    def test_is_valid_unique(self):
+        tests = [
+            {
+                TS_VALUE: "abc",
+                TS_CONF: {
+                    TYPE: UNIQUE,
+                    "label": "values",
+                    "shown_values": {}
+                },
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "jkl",
+                TS_CONF: {
+                    TYPE: UNIQUE,
+                    "label": "values",
+                    "shown_values": {
+                        "values": {"abc": '',
+                                   "def": '',
+                                   "ghi": ''},
+                    }
+                },
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: "abc",
+                TS_CONF: {
+                    TYPE: UNIQUE,
+                    "label": "values",
+                    "shown_values": {
+                        "values": {"abc": '',
+                                   "def": '',
+                                   "ghi": ''},
+                    }
+                },
+                TS_ASSERT: self.assertFalse
+            },
+        ]
+
+        for test in tests:
+            value = test[TS_VALUE]
+            conf = test[TS_CONF]
+            assert_func = test[TS_ASSERT]
+            with self.subTest(value=value):
+                assert_func(is_valid_unique(value, conf))
+
+    def test_is_valid_file(self):
+        tests = [
+            {
+                TS_VALUE: TEST_DATA_DIR / "invalid_structure.mirri.xlsx",
+                TS_ASSERT: self.assertTrue
+            },
+            {
+                TS_VALUE: TEST_DATA_DIR / "invalid_excel.mirri.json",
+                TS_ASSERT: self.assertFalse
+            },
+        ]
+
+        for test in tests:
+            value = test[TS_VALUE]
+            assert_func = test[TS_ASSERT]
+            with self.subTest(value=value):
+                assert_func(is_valid_file(value,))
+
+
+if __name__ == "__main__":
+    import sys
+    # sys.argv = ['',
+    #             'ValidatoionFunctionsTest.test_is_valid_regex']
+    unittest.main()
--- a/tests/test_writers.py
+++ b/tests/test_writers.py
@ -0,0 +1,24 @@
+
+import unittest
+from pathlib import Path
+from mirri.io.writers.mirri_excel import write_mirri_excel
+from mirri.io.parsers.mirri_excel import parse_mirri_excel
+
+TEST_DATA_DIR = Path(__file__).parent / "data"
+
+
+class MirriExcelTests(unittest.TestCase):
+    def test_valid_excel(self):
+        in_path = TEST_DATA_DIR / "valid.mirri.full.xlsx"
+        parsed_data = parse_mirri_excel(in_path.open('rb'), version="20200601")
+        strains = parsed_data["strains"]
+        growth_media = parsed_data["growth_media"]
+        out_path = Path("/tmp/test.xlsx")
+
+        write_mirri_excel(out_path, strains, growth_media, version="20200601")
+
+
+if __name__ == "__main__":
+    # import sys;sys.argv = ['',
+    #                        'BiolomicsWriter.test_mirri_excel_parser_invalid']
+    unittest.main()