forked from MIRRI/mirri_utils
18-07
This commit is contained in:
parent
37b2bbce98
commit
2370686d72
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -19,12 +19,6 @@ from mirri.settings import (COMMERCIAL_USE_WITH_AGREEMENT, GENOMIC_INFO,
|
||||
NAGOYA_PROBABLY_SCOPE, NO_RESTRICTION,
|
||||
ONLY_RESEARCH, ONTOBIOTOPE,
|
||||
PUBLICATION_FIELDS, STRAINS, SUBTAXAS)
|
||||
from mirri.settings_v1 import (COMMERCIAL_USE_WITH_AGREEMENT, GENOMIC_INFO,
|
||||
GROWTH_MEDIA, LITERATURE_SHEET, LOCATIONS,
|
||||
MIRRI_FIELDS, NAGOYA_DOCS_AVAILABLE, NAGOYA_NO_RESTRICTIONS,
|
||||
NAGOYA_PROBABLY_SCOPE, NO_RESTRICTION,
|
||||
ONLY_RESEARCH, ONTOBIOTOPE,
|
||||
PUBLICATION_FIELDS, STRAINS, SUBTAXAS)
|
||||
from mirri.utils import get_country_from_name
|
||||
|
||||
RESTRICTION_USE_TRANSLATOR = {
|
||||
@ -44,34 +38,12 @@ TRUEFALSE_TRANSLATOR = {
|
||||
|
||||
|
||||
def parse_mirri_excel(fhand, version=""):
|
||||
if version == "20200602":
|
||||
return _parse_mirri_v20200601(fhand)
|
||||
elif version == "12052023":
|
||||
if version == "5.1.2":
|
||||
return _parse_mirri_v12052023(fhand)
|
||||
else:
|
||||
raise NotImplementedError("Only versions 20200601 and 12052023 are implemented")
|
||||
raise NotImplementedError("Only version is 5.1.2 implemented")
|
||||
|
||||
|
||||
def _parse_mirri_v20200601(fhand):
|
||||
fhand.seek(0)
|
||||
file_content = BytesIO(fhand.read())
|
||||
wb = load_workbook(filename=file_content, read_only=True, data_only=True)
|
||||
|
||||
locations = workbook_sheet_reader(wb, LOCATIONS)
|
||||
ontobiotopes = workbook_sheet_reader(wb, ONTOBIOTOPE)
|
||||
|
||||
growth_media = list(parse_growth_media(wb))
|
||||
|
||||
markers = workbook_sheet_reader(wb, GENOMIC_INFO)
|
||||
|
||||
publications = list(parse_publications(wb))
|
||||
|
||||
strains = parse_strains(wb, locations=locations, growth_media=growth_media,
|
||||
markers=markers, publications=publications,
|
||||
ontobiotopes=ontobiotopes)
|
||||
|
||||
return {"strains": strains, "growth_media": growth_media}
|
||||
|
||||
def _parse_mirri_v12052023(fhand):
|
||||
fhand.seek(0)
|
||||
file_content = BytesIO(fhand.read())
|
||||
|
||||
@ -5,7 +5,6 @@ from openpyxl.workbook.workbook import Workbook
|
||||
|
||||
from mirri import rgetattr
|
||||
from mirri.settings import GROWTH_MEDIA, MIRRI_FIELDS, DATA_DIR, PUBLICATION_FIELDS
|
||||
from mirri.settings_v1 import GROWTH_MEDIA, MIRRI_FIELDS, DATA_DIR, PUBLICATION_FIELDS
|
||||
from mirri.io.parsers.mirri_excel import NAGOYA_TRANSLATOR, RESTRICTION_USE_TRANSLATOR
|
||||
|
||||
INITIAL_SEXUAL_STATES = [
|
||||
@ -51,81 +50,9 @@ PUB_HEADERS = [pb["label"] for pb in PUBLICATION_FIELDS]
|
||||
|
||||
|
||||
def write_mirri_excel(path, strains, growth_media, version):
|
||||
if version == "20200601":
|
||||
_write_mirri_excel_20200601(path, strains, growth_media)
|
||||
|
||||
if version == "12052023":
|
||||
if version == "5.1.2":
|
||||
_write_mirri_excel_12052023(path, strains, growth_media)
|
||||
|
||||
|
||||
def _write_mirri_excel_20200601(path, strains, growth_media):
|
||||
wb = Workbook()
|
||||
|
||||
write_markers_sheet(wb)
|
||||
|
||||
ontobiotope_path = DATA_DIR / "ontobiotopes.csv"
|
||||
write_ontobiotopes(wb, ontobiotope_path)
|
||||
|
||||
write_growth_media(wb, growth_media)
|
||||
growth_media_indexes = [str(gm.acronym) for gm in growth_media]
|
||||
|
||||
locations = {}
|
||||
publications = {}
|
||||
sexual_states = set(deepcopy(INITIAL_SEXUAL_STATES))
|
||||
genomic_markers = {}
|
||||
strains_data = _deserialize_strains(strains, locations, growth_media_indexes,
|
||||
publications, sexual_states, genomic_markers)
|
||||
strains_data = list(strains_data)
|
||||
|
||||
# write strain to generate indexed data
|
||||
strain_sheet = wb.create_sheet("Strains")
|
||||
strain_sheet.append([field["label"] for field in MIRRI_FIELDS])
|
||||
for strain_row in strains_data:
|
||||
strain_sheet.append(strain_row)
|
||||
redimension_cell_width(strain_sheet)
|
||||
|
||||
# write locations
|
||||
loc_sheet = wb.create_sheet("Geographic origin")
|
||||
loc_sheet.append(["ID", "Country", "Region", "City", "Locality"])
|
||||
for index, loc_index in enumerate(locations.keys()):
|
||||
location = locations[loc_index]
|
||||
row = [index, location.country, location.state, location.municipality,
|
||||
loc_index]
|
||||
loc_sheet.append(row)
|
||||
redimension_cell_width(loc_sheet)
|
||||
|
||||
# write publications
|
||||
pub_sheet = wb.create_sheet("Literature")
|
||||
pub_sheet.append(PUB_HEADERS)
|
||||
for publication in publications.values():
|
||||
row = []
|
||||
for pub_field in PUBLICATION_FIELDS:
|
||||
# if pub_field['attribute'] == 'id':
|
||||
# value = index
|
||||
value = getattr(publication, pub_field['attribute'], None)
|
||||
row.append(value)
|
||||
pub_sheet.append(row)
|
||||
redimension_cell_width(pub_sheet)
|
||||
|
||||
# write sexual states
|
||||
sex_sheet = wb.create_sheet("Sexual state")
|
||||
for sex_state in sorted(list(sexual_states)):
|
||||
sex_sheet.append([sex_state])
|
||||
redimension_cell_width(sex_sheet)
|
||||
|
||||
# write genetic markers
|
||||
markers_sheet = wb.create_sheet("Genomic information")
|
||||
markers_sheet.append(['Strain AN', 'Marker', 'INSDC AN', 'Sequence'])
|
||||
for strain_id, markers in genomic_markers.items():
|
||||
for marker in markers:
|
||||
row = [strain_id, marker.marker_type, marker.marker_id, marker.marker_seq]
|
||||
markers_sheet.append(row)
|
||||
redimension_cell_width(markers_sheet)
|
||||
|
||||
del wb["Sheet"]
|
||||
wb.save(str(path))
|
||||
|
||||
|
||||
def _write_mirri_excel_12052023(path, strains, growth_media):
|
||||
wb = Workbook()
|
||||
|
||||
|
||||
311
settings_v1.py
311
settings_v1.py
@ -1,311 +0,0 @@
|
||||
from pathlib import Path
|
||||
|
||||
from charset_normalizer import VERSION
|
||||
|
||||
DATA_DIR = Path(__file__).parent / "data"
|
||||
|
||||
ACCESSION_NUMBER = "accession_number"
|
||||
RESTRICTION_ON_USE = "restriction_on_use"
|
||||
NAGOYA_PROTOCOL = "nagoya_protocol"
|
||||
ABS_RELATED_FILES = "abs_related_files"
|
||||
MTA_FILES = "mta_file"
|
||||
OTHER_CULTURE_NUMBERS = "other_culture_collection_numbers"
|
||||
STRAIN_FROM_REGISTERED_COLLECTION = "strain_from_a_registered_collection"
|
||||
RISK_GROUP = "risk_group"
|
||||
DUAL_USE = "dual_use"
|
||||
QUARANTINE = "quarantine"
|
||||
ORGANISM_TYPE = "organism_type"
|
||||
TAXON_NAME = "taxon_name"
|
||||
INFRASUBSPECIFIC_NAME = "infrasubspecific_names"
|
||||
COMMENTS_ON_TAXONOMY = "comments_on_taxonomy"
|
||||
STATUS = "status"
|
||||
HISTORY_OF_DEPOSIT = "history_of_deposit"
|
||||
DEPOSITOR = "depositor"
|
||||
DATE_OF_DEPOSIT = "date_of_deposit"
|
||||
COLLECTED_BY = "collected_by"
|
||||
DATE_OF_COLLECTION = "date_of_collection"
|
||||
ISOLATED_BY = "isolated_by"
|
||||
DATE_OF_ISOLATION = "date_of_isolation"
|
||||
DATE_OF_INCLUSION = "date_of_inclusion_on_catalog"
|
||||
TESTED_TEMPERATURE_GROWTH_RANGE = "tested_temperature_growth_range"
|
||||
RECOMMENDED_GROWTH_TEMP = "recommended_growth_temperature"
|
||||
RECOMMENDED_GROWTH_MEDIUM = "recommended_media_for_growth"
|
||||
FORM_OF_SUPPLY = "form_of_supply"
|
||||
GEO_COORDS = "coordinates_of_geographic_origin"
|
||||
ACCESSION_NAME = "other_denomination"
|
||||
ALTITUDE = "altitude_of_geographic_origin"
|
||||
GEOGRAPHIC_ORIGIN = "geographic_origin"
|
||||
GMO = "gmo"
|
||||
GMO_CONSTRUCTION_INFO = "gmo_construction_information"
|
||||
MUTANT_INFORMATION = "mutant_information"
|
||||
GENOTYPE = "genotype"
|
||||
LITERATURE = "literature"
|
||||
SEXUAL_STATE = "sexual_state"
|
||||
PLOIDY = "ploidy"
|
||||
INTERSPECIFIC_HYBRID = "interspecific_hybrid"
|
||||
HYBRIDS = 'hybrids'
|
||||
PLANT_PATHOGENICITY_CODE = "plant_pathogenicity_code"
|
||||
PATHOGENICITY = "pathogenicity"
|
||||
ENZYME_PRODUCTION = "enzyme_production"
|
||||
PRODUCTION_OF_METABOLITES = "production_of_metabolites"
|
||||
APPLICATIONS = "applications"
|
||||
REMARKS = "remarks"
|
||||
PLASMIDS = "plasmids"
|
||||
PLASMIDS_COLLECTION_FIELDS = "plasmids_collections_fields"
|
||||
SUBSTRATE_HOST_OF_ISOLATION = "substrate_host_of_isolation"
|
||||
ISOLATION_HABITAT = "isolation_habitat"
|
||||
ONTOBIOTOPE_ISOLATION_HABITAT = "ontobiotope_term_for_the_isolation_habitat"
|
||||
LITERATURE_LINKED_TO_SEQ_GENOME = "literature_linked_to_the_sequence_genome"
|
||||
|
||||
# StrainId
|
||||
STRAIN_ID = "id"
|
||||
COLLECTION_CODE = "collection_code"
|
||||
STRAIN_PUI = "strain_pui"
|
||||
STRAIN_URL = "strain_url"
|
||||
|
||||
ID_SYNONYMS = 'id_synonyms'
|
||||
# Taxonomy
|
||||
GENUS = "genus"
|
||||
SPECIES = "species"
|
||||
|
||||
# Location
|
||||
COUNTRY = "countryOfOriginCode"
|
||||
SITE = "site"
|
||||
STATE = "state"
|
||||
PROVINCE = "province"
|
||||
MUNICIPALITY = "municipality"
|
||||
ISLAND = "island"
|
||||
OTHER = "other"
|
||||
LATITUDE = "latitude"
|
||||
LONGITUDE = "longitude"
|
||||
ALTITUDE = "altitude"
|
||||
GEOREF_METHOD = "georeferencingMethod"
|
||||
COORDUNCERTAINTY = "coordUncertainty"
|
||||
COORD_SPATIAL_REFERENCE = "coordenatesSpatialReference"
|
||||
LOCATION = "location"
|
||||
|
||||
ALLOWED_COLLECTING_SITE_KEYS = [
|
||||
COUNTRY,
|
||||
STATE,
|
||||
PROVINCE,
|
||||
ISLAND,
|
||||
MUNICIPALITY,
|
||||
OTHER,
|
||||
SITE,
|
||||
LATITUDE,
|
||||
LONGITUDE,
|
||||
ALTITUDE,
|
||||
GEOREF_METHOD,
|
||||
COORDUNCERTAINTY,
|
||||
COORD_SPATIAL_REFERENCE,
|
||||
]
|
||||
|
||||
MIRRI_FIELDS = [
|
||||
{"attribute": "id", "label": "Accession number"},
|
||||
{"attribute": "restriction_on_use", "label": "Restrictions on use"},
|
||||
{"attribute": "nagoya_protocol",
|
||||
"label": "Nagoya protocol restrictions and compliance conditions"},
|
||||
{"attribute": ABS_RELATED_FILES, "label": "ABS related files"},
|
||||
{"attribute": "mta_files", "label": "MTA file"},
|
||||
{"attribute": "other_numbers", "label": "Other culture collection numbers"},
|
||||
{"attribute": "is_from_registered_collection",
|
||||
"label": "Strain from a registered collection"},
|
||||
{"attribute": "risk_group", "label": "Risk Group"},
|
||||
{"attribute": "is_potentially_harmful", "label": "Dual use"},
|
||||
{"attribute": "is_subject_to_quarantine", "label": "Quarantine in Europe"},
|
||||
{"attribute": "taxonomy.organism_type", "label": "Organism type"},
|
||||
{"attribute": "taxonomy.taxon_name", "label": "Taxon name"},
|
||||
{"attribute": "taxonomy.infrasubspecific_name",
|
||||
"label": "Infrasubspecific names"},
|
||||
{"attribute": "taxonomy.comments", "label": "Comment on taxonomy"},
|
||||
{"attribute": "taxonomy.interspecific_hybrid",
|
||||
"label": "Interspecific hybrid"},
|
||||
{"attribute": "status", "label": "Status"},
|
||||
{"attribute": "history", "label": "History of deposit", },
|
||||
{"attribute": "deposit.who", "label": "Depositor"},
|
||||
{"attribute": "deposit.date", "label": "Date of deposit"},
|
||||
{"attribute": "catalog_inclusion_date",
|
||||
"label": "Date of inclusion in the catalogue"},
|
||||
{"attribute": "collect.who", "label": "Collected by"},
|
||||
{"attribute": "collect.date", "label": "Date of collection"},
|
||||
{"attribute": "isolation.who", "label": "Isolated by"},
|
||||
{"attribute": "isolation.date", "label": "Date of isolation"},
|
||||
{"attribute": "isolation.substrate_host_of_isolation",
|
||||
"label": "Substrate/host of isolation"},
|
||||
{"attribute": "growth.tested_temp_range",
|
||||
"label": "Tested temperature growth range"},
|
||||
{"attribute": "growth.recommended_temp",
|
||||
"label": "Recommended growth temperature"},
|
||||
{"attribute": "growth.recommended_media",
|
||||
"label": "Recommended medium for growth"},
|
||||
{"attribute": "form_of_supply", "label": "Form of supply"},
|
||||
{"attribute": "other_denominations", "label": "Other denomination"},
|
||||
{"attribute": "collect.location.coords",
|
||||
"label": "Coordinates of geographic origin"},
|
||||
{"attribute": "collect.location.altitude",
|
||||
"label": "Altitude of geographic origin"},
|
||||
{"attribute": "collect.location", "label": "Geographic origin"},
|
||||
{"attribute": "collect.habitat", "label": "Isolation habitat"},
|
||||
{"attribute": "collect.habitat_ontobiotope",
|
||||
"label": "Ontobiotope term for the isolation habitat"},
|
||||
{"attribute": "genetics.gmo", "label": "GMO"},
|
||||
{"attribute": "genetics.gmo_construction",
|
||||
"label": "GMO construction information"},
|
||||
{"attribute": "genetics.mutant_info", "label": "Mutant information"},
|
||||
{"attribute": "genetics.genotype", "label": "Genotype"},
|
||||
{"attribute": "genetics.sexual_state", "label": "Sexual state"},
|
||||
{"attribute": "genetics.ploidy", "label": "Ploidy"},
|
||||
{"attribute": "genetics.plasmids", "label": "Plasmids"},
|
||||
{"attribute": "genetics.plasmids_in_collections",
|
||||
"label": "Plasmids collections fields"},
|
||||
{"attribute": "publications", "label": "Literature"},
|
||||
{"attribute": PLANT_PATHOGENICITY_CODE, "label": "Plant pathogenicity code"},
|
||||
{"attribute": "pathogenicity", "label": "Pathogenicity"},
|
||||
{"attribute": "enzyme_production", "label": "Enzyme production"},
|
||||
{"attribute": "production_of_metabolites",
|
||||
"label": "Production of metabolites"},
|
||||
{"attribute": "applications", "label": "Applications", },
|
||||
{"attribute": "remarks", "label": "Remarks"},
|
||||
{"attribute": LITERATURE_LINKED_TO_SEQ_GENOME,
|
||||
"label": "Literature linked to the sequence/genome"},
|
||||
]
|
||||
|
||||
ALLOWED_SUBTAXA = ["subspecies", "variety", "convarietas", "group", "forma",
|
||||
'forma.specialis']
|
||||
ALLOWED_TAXONOMIC_RANKS = ["family", "genus", "species"] + ALLOWED_SUBTAXA
|
||||
|
||||
# nagoya
|
||||
NAGOYA_NO_RESTRICTIONS = "no_known_restrictions_under_the_Nagoya_protocol"
|
||||
NAGOYA_DOCS_AVAILABLE = "documents_providing_proof_of_legal_access_and_terms_of_use_available_at_the_collection"
|
||||
NAGOYA_PROBABLY_SCOPE = "strain_probably_in_scope,_please_contact_the_culture_collection"
|
||||
|
||||
ALLOWED_NAGOYA_OPTIONS = [NAGOYA_NO_RESTRICTIONS,
|
||||
NAGOYA_DOCS_AVAILABLE, NAGOYA_PROBABLY_SCOPE]
|
||||
|
||||
# Use restriction
|
||||
NO_RESTRICTION = "no_restriction"
|
||||
ONLY_RESEARCH = "only_research"
|
||||
COMMERCIAL_USE_WITH_AGREEMENT = "commercial_use_with_agreement"
|
||||
|
||||
ALLOWED_RESTRICTION_USE_OPTIONS = [
|
||||
NO_RESTRICTION,
|
||||
ONLY_RESEARCH,
|
||||
COMMERCIAL_USE_WITH_AGREEMENT,
|
||||
]
|
||||
|
||||
ALLOWED_RISK_GROUPS = ["1", "2", "3", "4"]
|
||||
|
||||
AGAR = "Agar"
|
||||
CRYO = "Cryo"
|
||||
DRY_ICE = "Dry Ice"
|
||||
LIQUID_CULTURE_MEDIUM = "Liquid Culture Medium"
|
||||
LYO = "Lyo"
|
||||
OIL = "Oil"
|
||||
WATER = "Water"
|
||||
ALLOWED_FORMS_OF_SUPPLY = [AGAR, CRYO, DRY_ICE,
|
||||
LIQUID_CULTURE_MEDIUM, LYO, OIL, WATER]
|
||||
|
||||
DEPOSIT = "deposit"
|
||||
ISOLATION = "isolation"
|
||||
COLLECT = "collect"
|
||||
GROWTH = "growth"
|
||||
GENETICS = "genetics"
|
||||
TAXONOMY = "taxonomy"
|
||||
# Markers
|
||||
MARKERS = "markers"
|
||||
MARKER_TYPE = "marker_type"
|
||||
MARKER_INSDC = "INSDC"
|
||||
MARKER_SEQ = "marker_seq"
|
||||
ALLOWED_MARKER_TYPES = [
|
||||
{"acronym": "16S rRNA", "marker": "16S rRNA"},
|
||||
{"acronym": "ACT", "marker": "Actin"},
|
||||
{"acronym": "CaM", "marker": "Calmodulin"},
|
||||
{"acronym": "EF-1α", "marker": "elongation factor 1-alpha (EF-1α)"},
|
||||
{"acronym": "ITS",
|
||||
"marker": "nuclear ribosomal Internal Transcribed Spacer (ITS)"},
|
||||
{"acronym": "LSU", "marker": "nuclear ribosomal Large SubUnit (LSU)"},
|
||||
{"acronym": "RPB1", "marker": "Ribosomal RNA-coding genes RPB1"},
|
||||
{"acronym": "RPB2", "marker": "Ribosomal RNA-coding genes RPB2"},
|
||||
{"acronym": "TUBB", "marker": "β-Tubulin"},
|
||||
]
|
||||
|
||||
PUBLICATIONS = "publications"
|
||||
PUB_ID = "id"
|
||||
PUB_DOI = "pub_doi"
|
||||
PUB_PUBMED_ID = ''
|
||||
PUB_FULL_REFERENCE = "full_reference"
|
||||
PUB_TITLE = "title"
|
||||
PUB_AUTHORS = "authors"
|
||||
PUB_JOURNAL = "journal"
|
||||
PUB_YEAR = "year"
|
||||
PUB_VOLUME = "volume"
|
||||
PUB_ISSUE = "issue"
|
||||
PUB_FIRST_PAGE = "first_page"
|
||||
PUB_LAST_PAGE = "last_page"
|
||||
BOOK_TITLE = "book_title"
|
||||
BOOK_EDITOR = "book_editor"
|
||||
BOOK_PUBLISHER = "book_publisher"
|
||||
|
||||
|
||||
PUBLICATION_FIELDS = [
|
||||
{"label": "ID", "attribute": PUB_ID},
|
||||
{"label": "Full reference", "attribute": PUB_FULL_REFERENCE},
|
||||
{"label": "Authors", "attribute": PUB_AUTHORS},
|
||||
{"label": "Title", "attribute": PUB_TITLE},
|
||||
{"label": "Journal", "attribute": PUB_JOURNAL},
|
||||
{"label": "Year", "attribute": PUB_YEAR},
|
||||
{"label": "Volume", "attribute": PUB_VOLUME},
|
||||
{"label": "Issue", "attribute": PUB_ISSUE},
|
||||
{"label": "First page", "attribute": PUB_FIRST_PAGE},
|
||||
{"label": "Last page", "attribute": PUB_FIRST_PAGE},
|
||||
{"label": "Book title", "attribute": BOOK_TITLE},
|
||||
{"label": "Editors", "attribute": BOOK_EDITOR},
|
||||
{"label": "Publisher", "attribute": BOOK_PUBLISHER},
|
||||
]
|
||||
|
||||
|
||||
# ploidy
|
||||
ANEUPLOID = 0
|
||||
HAPLOID = 1
|
||||
DIPLOID = 2
|
||||
TRIPLOID = 3
|
||||
TETRAPLOID = 4
|
||||
POLYPLOID = 9
|
||||
|
||||
ALLOWED_PLOIDIES = [ANEUPLOID, HAPLOID, DIPLOID, TRIPLOID, TETRAPLOID,
|
||||
POLYPLOID]
|
||||
|
||||
SUBTAXAS = {
|
||||
"subsp.": "subspecies",
|
||||
"var.": "variety",
|
||||
"convar.": "convarietas",
|
||||
"group.": "group",
|
||||
"f.": "forma",
|
||||
"f.sp.": "forma.specialis"
|
||||
}
|
||||
|
||||
|
||||
#Control
|
||||
VERSION = "Version"
|
||||
DATE = "Date"
|
||||
|
||||
|
||||
#Controle files
|
||||
CONTROL_FIELDS = [
|
||||
{"label": "Version", "attribute": VERSION},
|
||||
{"label": "Date", "attribute": DATE},
|
||||
]
|
||||
|
||||
# Excel sheet name
|
||||
LOCATIONS = "Geographic origin" # 'Locations'
|
||||
GROWTH_MEDIA = "Growth media"
|
||||
GENOMIC_INFO = "Genomic information"
|
||||
STRAINS = "Strains"
|
||||
LITERATURE_SHEET = "Literature"
|
||||
SEXUAL_STATE_SHEET = "Sexual state"
|
||||
RESOURCE_TYPES_VALUES = "Resource types values"
|
||||
FORM_OF_SUPPLY_SHEET = "Forms of supply"
|
||||
PLOIDY_SHEET = "Ploidy"
|
||||
ONTOBIOTOPE = "Ontobiotope"
|
||||
MARKERS = "Markers"
|
||||
CONTROL_SHEET = "Version"
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -16,20 +16,19 @@ from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROS
|
||||
TYPE, UNIQUE, VALIDATION, VALUES, BIBLIO, DOMINIO,URL_DOMINIO, ISO, URL_TITLE,JUST_URL,TITLE,
|
||||
HISTORY,NAGOYA1, VERSION)
|
||||
from mirri.settings import LOCATIONS, SUBTAXAS
|
||||
from mirri.settings_v1 import LOCATIONS, SUBTAXAS
|
||||
from mirri.validation.validation_conf_12052023 import version_config
|
||||
|
||||
from mirri.validation.validation_conf_12052023 import MIRRI_12052023_VALLIDATION_CONF
|
||||
|
||||
def validate_mirri_excel(fhand, version="", date=""):
|
||||
configuration = version_config.get(version)
|
||||
if configuration is None:
|
||||
raise NotImplementedError("Unsupported version: " + version)
|
||||
configuration["date"] = date or configuration.get("date")
|
||||
if configuration["date"] != "12/05/2023":
|
||||
raise ValueError("Invalid date. Expected: 12/05/2023")
|
||||
return validate_excel(fhand, configuration)
|
||||
|
||||
|
||||
def validate_mirri_excel(fhand, version= "5.1.2" ):
|
||||
if version == "5.1.2":
|
||||
configuration = MIRRI_12052023_VALLIDATION_CONF
|
||||
else:
|
||||
raise NotImplementedError("Only version 5.1.2 is implemented")
|
||||
|
||||
return validate_excel(fhand, configuration)
|
||||
|
||||
def version(value , validation_conf=None):
|
||||
if value is None:
|
||||
return True
|
||||
@ -210,8 +209,6 @@ def validate_row(row, validation_steps, in_memory_sheets):
|
||||
kind = validation_step[TYPE]
|
||||
error_code = validation_step[ERROR_CODE]
|
||||
if kind == NAGOYA:
|
||||
if not is_valid_nagoya_v20200601(row, in_memory_sheets):
|
||||
return error_code
|
||||
if not is_valid_nagoya_v12052023(row, in_memory_sheets):
|
||||
return error_code
|
||||
elif kind == BIBLIO:
|
||||
@ -281,39 +278,10 @@ def is_valid_nago(row):
|
||||
return True
|
||||
|
||||
def parsee_mirri_excel(row, in_memory_sheets, version=""):
|
||||
if version == "20200601":
|
||||
return is_valid_nagoya_v20200601 (row, in_memory_sheets)
|
||||
elif version == "12052023":
|
||||
if version == "12052023":
|
||||
return is_valid_nagoya_v12052023 (row, in_memory_sheets)
|
||||
else:
|
||||
raise NotImplementedError("Only versions 20200601 and 12052023 are implemented")
|
||||
|
||||
def is_valid_nagoya_v20200601(row, in_memory_sheets): # sourcery skip: return-identity
|
||||
location_index = row.get('Geographic origin', None)
|
||||
if location_index is None:
|
||||
country = None
|
||||
else:
|
||||
geo_origin = in_memory_sheets[LOCATIONS].get(location_index, {})
|
||||
country = geo_origin.get('Country', None)
|
||||
|
||||
_date = row.get("Date of collection", None)
|
||||
if _date is None:
|
||||
_date = row.get("Date of isolation", None)
|
||||
if _date is None:
|
||||
_date = row.get("Date of deposit", None)
|
||||
if _date is None:
|
||||
_date = row.get("Date of inclusion in the catalogue", None)
|
||||
if _date is not None:
|
||||
year = _date.year if isinstance(_date, datetime) else int(str(_date)[:4])
|
||||
else:
|
||||
year = None
|
||||
|
||||
if year is not None and year >= 2014 and country is None:
|
||||
return False
|
||||
|
||||
|
||||
|
||||
return True
|
||||
raise NotImplementedError("Only version is implemented")
|
||||
|
||||
def is_valid_nagoya_v12052023(row, in_memory_sheets): # sourcery skip: return-identity
|
||||
location_index = row.get('geographicOrigin', None)
|
||||
|
||||
@ -9,10 +9,9 @@ from mirri.validation.excel_validator import validate_mirri_excel
|
||||
def main():
|
||||
path = Path(sys.argv[1])
|
||||
version = str(sys.argv[2])
|
||||
date = str(sys.argv[3])
|
||||
try:
|
||||
|
||||
error_log = validate_mirri_excel(path.open("rb"), version=version, date=date)
|
||||
error_log = validate_mirri_excel(path.open("rb"), version=version)
|
||||
|
||||
except NotImplementedError as e:
|
||||
print(e)
|
||||
|
||||
@ -4,7 +4,7 @@ from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROS
|
||||
UNIQUE,VERSION,
|
||||
VALIDATION, VALUES, BIBLIO, DOMINIO, URL_DOMINIO,ISO, JUST_URL, URL_TITLE, TITLE, HISTORY,NAGOYA1)
|
||||
from mirri.settings import (ONTOBIOTOPE, LOCATIONS, GROWTH_MEDIA, GENOMIC_INFO,
|
||||
STRAINS, LITERATURE_SHEET, SEXUAL_STATE_SHEET, MARKERS, CONTROL_SHEET,)
|
||||
STRAINS, LITERATURE_SHEET, SEXUAL_STATE_SHEET, MARKERS, CONTROL_SHEET)
|
||||
|
||||
|
||||
|
||||
@ -323,7 +323,7 @@ STRAIN_FIELDS = [
|
||||
{
|
||||
FIELD: "plasmidCollections",
|
||||
VALIDATION: [
|
||||
{TYPE: REGEXP, MATCH: "([a-zA-Z .'-]+)\(([a-zA-Z .'-]+) (\d+)\)(;([a-zA-Z .'-]+)\(([a-zA-Z .'-]+) (\d+)\))*$",
|
||||
{TYPE: REGEXP, MATCH: "([a-zA-Z .'-]+)\(([a-zA-Z .'-]+) (\d+)\)(\s*;([a-zA-Z .'-]+)\(([a-zA-Z .'-]+) (\d+)\))*$",
|
||||
ERROR_CODE: "STD62"}
|
||||
]
|
||||
},
|
||||
@ -358,7 +358,7 @@ STRAIN_FIELDS = [
|
||||
{
|
||||
FIELD: "sequenceLiterature",
|
||||
VALIDATION: [
|
||||
{TYPE: REGEXP, MATCH: "^\d+(;?\s*\d+)*$", ERROR_CODE: "STD61"},
|
||||
{TYPE: REGEXP, MATCH: "^\d+(\s*;?\s*\d+)*$", ERROR_CODE: "STD61"},
|
||||
]
|
||||
|
||||
},
|
||||
|
||||
@ -1,545 +0,0 @@
|
||||
from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
|
||||
ERROR_CODE, FIELD, MANDATORY, MATCH,
|
||||
MISSING, MULTIPLE, NAGOYA, NUMBER, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON, TYPE,
|
||||
UNIQUE,
|
||||
VALIDATION, VALUES, BIBLIO)
|
||||
from mirri.settings_v1 import (ONTOBIOTOPE, LOCATIONS, GROWTH_MEDIA, GENOMIC_INFO,
|
||||
STRAINS, LITERATURE_SHEET, SEXUAL_STATE_SHEET, MARKERS)
|
||||
# GEOGRAPHIC_ORIGIN
|
||||
# SEXUAL_STATE_SHEET,
|
||||
# RESOURCE_TYPES_VALUES,
|
||||
# FORM_OF_SUPPLY_SHEET,
|
||||
# PLOIDY_SHEET)
|
||||
|
||||
|
||||
STRAIN_FIELDS = [
|
||||
{
|
||||
FIELD: "Accession number",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: 'STD01'},
|
||||
{TYPE: UNIQUE, ERROR_CODE: 'STD03'},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD02"},
|
||||
{TYPE: REGEXP, MATCH: "[^ ]* [^ ]*", ERROR_CODE: "STD04"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Restrictions on use",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD05"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD06"},
|
||||
{TYPE: CHOICES, VALUES: ["1", "2", "3"],
|
||||
MULTIPLE: False, ERROR_CODE: "STD07"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Nagoya protocol restrictions and compliance conditions",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD08"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD09"},
|
||||
{TYPE: CHOICES, VALUES: ["1", "2", "3"],
|
||||
MULTIPLE: False, ERROR_CODE: "STD10"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "ABS related files",
|
||||
VALIDATION: [],
|
||||
},
|
||||
{
|
||||
FIELD: "MTA file",
|
||||
VALIDATION: [],
|
||||
},
|
||||
{
|
||||
FIELD: "Other culture collection numbers",
|
||||
# VALIDATION: [
|
||||
# {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD07",
|
||||
# MULTIPLE: True, SEPARATOR: ";"}
|
||||
# ]
|
||||
},
|
||||
{
|
||||
FIELD: "Strain from a registered collection",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||
ERROR_CODE: "STD11"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Risk Group",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD12"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD13"},
|
||||
{TYPE: CHOICES, VALUES: ["1", "2", "3", "4"],
|
||||
MULTIPLE: False, ERROR_CODE: "STD14"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Dual use",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||
ERROR_CODE: "STD15"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Quarantine in Europe",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||
ERROR_CODE: "STD16"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Organism type",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD17"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD18"},
|
||||
{TYPE: CHOICES, VALUES: ["Algae", "Archaea", "Bacteria",
|
||||
"Cyanobacteria", "Filamentous Fungi",
|
||||
"Phage", "Plasmid", "Virus", "Yeast",
|
||||
"1", "2", "3", "4", "5", "6", "7", "8", "9"],
|
||||
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD19"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Taxon name",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD20"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD21"},
|
||||
{TYPE: TAXON, ERROR_CODE: "STD22", MULTIPLE: True,
|
||||
SEPARATOR: ';'}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Infrasubspecific names",
|
||||
},
|
||||
{
|
||||
FIELD: "Comment on taxonomy",
|
||||
},
|
||||
{
|
||||
FIELD: "Interspecific hybrid",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||
ERROR_CODE: "STD23"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Status",
|
||||
},
|
||||
{
|
||||
FIELD: "History of deposit",
|
||||
VALIDATION: [
|
||||
# {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD24", # modify the regex
|
||||
# MULTIPLE: True, SEPARATOR: ";"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Depositor"
|
||||
},
|
||||
{
|
||||
FIELD: "Date of deposit",
|
||||
VALIDATION: [
|
||||
{TYPE: DATE, ERROR_CODE: "STD25"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Date of inclusion in the catalogue",
|
||||
VALIDATION: [
|
||||
{TYPE: DATE, ERROR_CODE: "STD26"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Collected by",
|
||||
},
|
||||
{
|
||||
FIELD: "Date of collection",
|
||||
VALIDATION: [
|
||||
{TYPE: DATE, ERROR_CODE: "STD27"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Isolated by",
|
||||
},
|
||||
{
|
||||
FIELD: "Date of isolation",
|
||||
VALIDATION: [
|
||||
{TYPE: DATE, ERROR_CODE: "STD28"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Substrate/host of isolation",
|
||||
},
|
||||
{
|
||||
FIELD: "Tested temperature growth range",
|
||||
VALIDATION: [
|
||||
{TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?',
|
||||
ERROR_CODE: "STD29", MULTIPLE: True, SEPARATOR: ";"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Recommended growth temperature",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD30"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD31"},
|
||||
{TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?',
|
||||
ERROR_CODE: "STD32",
|
||||
MULTIPLE: True, SEPARATOR: ";"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Recommended medium for growth",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD33"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD34"},
|
||||
{TYPE: CROSSREF, CROSSREF_NAME: "Growth media",
|
||||
MULTIPLE: True, SEPARATOR: "/", ERROR_CODE: "STD35"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Form of supply",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "STD36"},
|
||||
{TYPE: MISSING, ERROR_CODE: "STD37"},
|
||||
{TYPE: CHOICES, VALUES: ['Agar', 'Cryo', 'Dry Ice', 'Liquid Culture Medium',
|
||||
'Lyo', 'Oil', 'Water'],
|
||||
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD38"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Other denomination",
|
||||
},
|
||||
{
|
||||
FIELD: "Coordinates of geographic origin",
|
||||
VALIDATION: [
|
||||
{TYPE: COORDINATES, ERROR_CODE: "STD39"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Altitude of geographic origin",
|
||||
VALIDATION: [
|
||||
{TYPE: NUMBER, 'max': 8000, 'min': -200, ERROR_CODE: "STD40"},
|
||||
]
|
||||
},
|
||||
{
|
||||
# value can be in the cell or in another sheet. Don't configure this
|
||||
FIELD: "Geographic origin",
|
||||
},
|
||||
{
|
||||
FIELD: "Isolation habitat",
|
||||
},
|
||||
{
|
||||
FIELD: "Ontobiotope term for the isolation habitat",
|
||||
VALIDATION: [
|
||||
{TYPE: CROSSREF, CROSSREF_NAME: "Ontobiotope",
|
||||
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD41"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "GMO",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||
ERROR_CODE: "STD42"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "GMO construction information",
|
||||
},
|
||||
{
|
||||
FIELD: "Mutant information",
|
||||
},
|
||||
{
|
||||
FIELD: "Genotype",
|
||||
},
|
||||
{
|
||||
FIELD: "Sexual state",
|
||||
VALIDATION: [
|
||||
{TYPE: CROSSREF, CROSSREF_NAME: SEXUAL_STATE_SHEET,
|
||||
ERROR_CODE: "STD43"}
|
||||
# {TYPE: CHOICES, VALUES: ["Mata", "Matalpha", "Mata/Matalpha",
|
||||
# "Matb", "Mata/Matb", "MTLa", "MTLalpha", "MTLa/MTLalpha",
|
||||
# "MAT1-1", "MAT1-2", "MAT1", "MAT2", "MT+", "MT-"],
|
||||
# ERROR_CODE: "STD43"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Ploidy",
|
||||
VALIDATION: [
|
||||
{TYPE: CHOICES, VALUES: ["0", "1", "2", "3", "4", "9"],
|
||||
ERROR_CODE: "STD44"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Plasmids",
|
||||
},
|
||||
{
|
||||
FIELD: "Plasmids collections fields",
|
||||
},
|
||||
{
|
||||
# value can be in the cell or in another sheet. Don't configure this
|
||||
FIELD: "Literature",
|
||||
VALIDATION: [
|
||||
{TYPE: CROSSREF, CROSSREF_NAME: LITERATURE_SHEET,
|
||||
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD45"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Plant pathogenicity code",
|
||||
},
|
||||
{
|
||||
FIELD: "Pathogenicity",
|
||||
},
|
||||
{
|
||||
FIELD: "Enzyme production",
|
||||
},
|
||||
{
|
||||
FIELD: "Production of metabolites",
|
||||
},
|
||||
{
|
||||
FIELD: "Applications",
|
||||
},
|
||||
{
|
||||
FIELD: "Remarks"
|
||||
},
|
||||
{
|
||||
FIELD: "Literature linked to the sequence/genome",
|
||||
},
|
||||
]
|
||||
SHEETS_SCHEMA = {
|
||||
LOCATIONS: {
|
||||
"acronym": "GOD",
|
||||
"id_field": "ID",
|
||||
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS02"},
|
||||
COLUMNS: [
|
||||
{
|
||||
FIELD: "ID",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "GOD01"},
|
||||
{TYPE: MISSING, ERROR_CODE: "GOD02"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Country",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "GOD03"},
|
||||
{TYPE: MISSING, ERROR_CODE: "GOD04"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Region",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "City",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Locality",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "GOD06"},
|
||||
{TYPE: MISSING, ERROR_CODE: "GOD07"}
|
||||
]
|
||||
}
|
||||
],
|
||||
},
|
||||
GROWTH_MEDIA: {
|
||||
"acronym": "GMD",
|
||||
"id_field": "Acronym",
|
||||
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS01"},
|
||||
COLUMNS: [
|
||||
{
|
||||
FIELD: "Acronym",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "GMD01"},
|
||||
{TYPE: MISSING, ERROR_CODE: "GMD02"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Description",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "GMD03"},
|
||||
{TYPE: MISSING, ERROR_CODE: "GMD04"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Full description",
|
||||
VALIDATION: []
|
||||
},
|
||||
],
|
||||
},
|
||||
GENOMIC_INFO: {
|
||||
"acronym": "GID",
|
||||
"id_field": "Strain AN",
|
||||
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS08"},
|
||||
COLUMNS: [
|
||||
{
|
||||
FIELD: "Strain AN",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "GID01"},
|
||||
{TYPE: MISSING, ERROR_CODE: "GID02"},
|
||||
{TYPE: CROSSREF, CROSSREF_NAME: "Strains",
|
||||
ERROR_CODE: "GID03"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Marker",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "GID04"},
|
||||
{TYPE: MISSING, ERROR_CODE: "GID05"},
|
||||
{TYPE: CROSSREF, CROSSREF_NAME: MARKERS, ERROR_CODE: "GID06"}
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "INSDC AN",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "GID07"},
|
||||
{TYPE: MISSING, ERROR_CODE: "GID08"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Sequence",
|
||||
VALIDATION: []
|
||||
},
|
||||
],
|
||||
},
|
||||
STRAINS: {
|
||||
"acronym": "STD",
|
||||
'id_field': 'Accession number',
|
||||
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS05"},
|
||||
ROW_VALIDATION: [
|
||||
{TYPE: NAGOYA, ERROR_CODE: "STD46"},
|
||||
],
|
||||
COLUMNS: STRAIN_FIELDS,
|
||||
},
|
||||
LITERATURE_SHEET: {
|
||||
"acronym": "LID",
|
||||
'id_field': 'ID',
|
||||
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS03"},
|
||||
ROW_VALIDATION: [
|
||||
{TYPE: BIBLIO, ERROR_CODE: 'LID17'}
|
||||
],
|
||||
COLUMNS: [
|
||||
{
|
||||
FIELD: "ID",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "LID01"},
|
||||
{TYPE: MISSING, ERROR_CODE: "LID02"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Full reference",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "LID03"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Authors",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "LID05"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Title",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "LID07"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Journal",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "LID09"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Year",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "LID11"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Volume",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "LID13"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Issue",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "First page",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "LID15"},
|
||||
{TYPE: MISSING, ERROR_CODE: "LID16"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Last page",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Book title",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Editors",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Publisher",
|
||||
VALIDATION: []
|
||||
}
|
||||
],
|
||||
},
|
||||
# SEXUAL_STATE_SHEET: {"acronym": "SSD", COLUMNS: []},
|
||||
# RESOURCE_TYPES_VALUES: {"acronym": "RTD", COLUMNS: []},
|
||||
# FORM_OF_SUPPLY_SHEET: {"acronym": "FSD", COLUMNS: []},
|
||||
# PLOIDY_SHEET: {"acronym": "PLD", COLUMNS: []},
|
||||
ONTOBIOTOPE: {
|
||||
"acronym": "OTD",
|
||||
"id_field": "ID",
|
||||
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS06"},
|
||||
COLUMNS: [
|
||||
{
|
||||
FIELD: "ID",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "OTD01"},
|
||||
{TYPE: MISSING, ERROR_CODE: "OTD02"},
|
||||
]
|
||||
},
|
||||
{
|
||||
FIELD: "Name",
|
||||
VALIDATION: [
|
||||
{TYPE: MANDATORY, ERROR_CODE: "OTD03"},
|
||||
{TYPE: MISSING, ERROR_CODE: "OTD04"},
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
MARKERS: {
|
||||
"acronym": "MKD",
|
||||
"id_field": "Acronym",
|
||||
COLUMNS: [
|
||||
{
|
||||
FIELD: "Acronym",
|
||||
VALIDATION: []
|
||||
},
|
||||
{
|
||||
FIELD: "Marker",
|
||||
VALIDATION: []
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
CROSS_REF_CONF = {
|
||||
ONTOBIOTOPE: ['ID', 'Name'],
|
||||
LITERATURE_SHEET: ['ID'],
|
||||
LOCATIONS: ['Locality'],
|
||||
GROWTH_MEDIA: ['Acronym'],
|
||||
STRAINS: ["Accession number"],
|
||||
SEXUAL_STATE_SHEET: [],
|
||||
MARKERS: ["Acronym"],
|
||||
}
|
||||
|
||||
MIRRI_20200601_VALLIDATION_CONF = {
|
||||
'sheet_schema': SHEETS_SCHEMA,
|
||||
'cross_ref_conf': CROSS_REF_CONF,
|
||||
'keep_sheets_in_memory': [
|
||||
{'sheet_name': LOCATIONS, 'indexed_by': 'Locality'}]
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user