Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 7338930c72 |
@ -37,14 +37,14 @@ TRUEFALSE_TRANSLATOR = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def parse_mirri_excel(fhand, version="20200601"):
|
def parse_mirri_excel(fhand, version=""):
|
||||||
if version == "20200601":
|
if version == "5.1.2":
|
||||||
return _parse_mirri_v20200601(fhand)
|
return _parse_mirri_v12052023(fhand)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("Only version 20200601 is implemented")
|
raise NotImplementedError("Only version is 5.1.2 implemented")
|
||||||
|
|
||||||
|
|
||||||
def _parse_mirri_v20200601(fhand):
|
def _parse_mirri_v12052023(fhand):
|
||||||
fhand.seek(0)
|
fhand.seek(0)
|
||||||
file_content = BytesIO(fhand.read())
|
file_content = BytesIO(fhand.read())
|
||||||
wb = load_workbook(filename=file_content, read_only=True, data_only=True)
|
wb = load_workbook(filename=file_content, read_only=True, data_only=True)
|
||||||
@ -64,7 +64,6 @@ def _parse_mirri_v20200601(fhand):
|
|||||||
|
|
||||||
return {"strains": strains, "growth_media": growth_media}
|
return {"strains": strains, "growth_media": growth_media}
|
||||||
|
|
||||||
|
|
||||||
def index_list_by(list_, id_):
|
def index_list_by(list_, id_):
|
||||||
return {str(item[id_]): item for item in list_}
|
return {str(item[id_]): item for item in list_}
|
||||||
|
|
||||||
@ -125,7 +124,7 @@ def parse_strains(wb, locations, growth_media, markers, publications,
|
|||||||
publications = index_list_by_attr(publications, 'id')
|
publications = index_list_by_attr(publications, 'id')
|
||||||
markers = index_markers(markers)
|
markers = index_markers(markers)
|
||||||
|
|
||||||
for strain_row in workbook_sheet_reader(wb, STRAINS, "Accession number"):
|
for strain_row in workbook_sheet_reader(wb, STRAINS, "accessionNumber"):
|
||||||
strain = StrainMirri()
|
strain = StrainMirri()
|
||||||
strain_id = None
|
strain_id = None
|
||||||
label = None
|
label = None
|
||||||
@ -140,7 +139,7 @@ def parse_strains(wb, locations, growth_media, markers, publications,
|
|||||||
collection, number = value.split(" ", 1)
|
collection, number = value.split(" ", 1)
|
||||||
value = StrainId(collection=collection, number=number)
|
value = StrainId(collection=collection, number=number)
|
||||||
rsetattr(strain, attribute, value)
|
rsetattr(strain, attribute, value)
|
||||||
|
|
||||||
elif attribute == "restriction_on_use":
|
elif attribute == "restriction_on_use":
|
||||||
rsetattr(strain, attribute, RESTRICTION_USE_TRANSLATOR[value])
|
rsetattr(strain, attribute, RESTRICTION_USE_TRANSLATOR[value])
|
||||||
elif attribute == "nagoya_protocol":
|
elif attribute == "nagoya_protocol":
|
||||||
@ -202,9 +201,19 @@ def parse_strains(wb, locations, growth_media, markers, publications,
|
|||||||
items = value.split(";")
|
items = value.split(";")
|
||||||
strain.collect.location.latitude = float(items[0])
|
strain.collect.location.latitude = float(items[0])
|
||||||
strain.collect.location.longitude = float(items[1])
|
strain.collect.location.longitude = float(items[1])
|
||||||
|
strain.collect.location.precision = float(items[2])
|
||||||
|
strain.collect.location.altitude = float(items[3])
|
||||||
|
if len(items) > 4:
|
||||||
|
strain.collect.location.coord_uncertainty = items[4]
|
||||||
|
|
||||||
|
elif attribute == "collect.site.links":
|
||||||
|
items = value.split(";")
|
||||||
|
strain.collect.site.links.nameSite = str(items[0])
|
||||||
|
strain.collect.site.links.urlSite = str(items[1])
|
||||||
|
rsetattr(strain, attribute, value.split(";")) #ver o separador
|
||||||
if len(items) > 2:
|
if len(items) > 2:
|
||||||
strain.collect.location.coord_uncertainty = items[2]
|
strain.collect.site.links.site_uncertainty = items[2]
|
||||||
|
|
||||||
elif attribute == "collect.location":
|
elif attribute == "collect.location":
|
||||||
location = locations[value]
|
location = locations[value]
|
||||||
if 'Country' in location and location['Country']:
|
if 'Country' in location and location['Country']:
|
||||||
|
|||||||
@ -50,11 +50,10 @@ PUB_HEADERS = [pb["label"] for pb in PUBLICATION_FIELDS]
|
|||||||
|
|
||||||
|
|
||||||
def write_mirri_excel(path, strains, growth_media, version):
|
def write_mirri_excel(path, strains, growth_media, version):
|
||||||
if version == "20200601":
|
if version == "5.1.2":
|
||||||
_write_mirri_excel_20200601(path, strains, growth_media)
|
_write_mirri_excel_12052023(path, strains, growth_media)
|
||||||
|
|
||||||
|
def _write_mirri_excel_12052023(path, strains, growth_media):
|
||||||
def _write_mirri_excel_20200601(path, strains, growth_media):
|
|
||||||
wb = Workbook()
|
wb = Workbook()
|
||||||
|
|
||||||
write_markers_sheet(wb)
|
write_markers_sheet(wb)
|
||||||
@ -104,7 +103,7 @@ def _write_mirri_excel_20200601(path, strains, growth_media):
|
|||||||
redimension_cell_width(pub_sheet)
|
redimension_cell_width(pub_sheet)
|
||||||
|
|
||||||
# write sexual states
|
# write sexual states
|
||||||
sex_sheet = wb.create_sheet("Sexual states")
|
sex_sheet = wb.create_sheet("Sexual state")
|
||||||
for sex_state in sorted(list(sexual_states)):
|
for sex_state in sorted(list(sexual_states)):
|
||||||
sex_sheet.append([sex_state])
|
sex_sheet.append([sex_state])
|
||||||
redimension_cell_width(sex_sheet)
|
redimension_cell_width(sex_sheet)
|
||||||
@ -121,7 +120,6 @@ def _write_mirri_excel_20200601(path, strains, growth_media):
|
|||||||
del wb["Sheet"]
|
del wb["Sheet"]
|
||||||
wb.save(str(path))
|
wb.save(str(path))
|
||||||
|
|
||||||
|
|
||||||
def _deserialize_strains(strains, locations, growth_media_indexes,
|
def _deserialize_strains(strains, locations, growth_media_indexes,
|
||||||
publications, sexual_states, genomic_markers):
|
publications, sexual_states, genomic_markers):
|
||||||
for strain in strains:
|
for strain in strains:
|
||||||
@ -189,10 +187,21 @@ def _deserialize_strains(strains, locations, growth_media_indexes,
|
|||||||
elif attribute == "collect.location.coords":
|
elif attribute == "collect.location.coords":
|
||||||
lat = strain.collect.location.latitude
|
lat = strain.collect.location.latitude
|
||||||
long = strain.collect.location.longitude
|
long = strain.collect.location.longitude
|
||||||
if lat is not None and long is not None:
|
alt = strain.collect.location.altitude
|
||||||
value = f"{lat};{long}"
|
prec = strain.collect.location.precision
|
||||||
|
if lat is not None and long is not None and prec is not None and alt is not None:
|
||||||
|
value = f"{lat};{long};{prec};{alt}"
|
||||||
else:
|
else:
|
||||||
value = None
|
value = None
|
||||||
|
elif attribute == "collect.site.links":
|
||||||
|
name = strain.collect.site.links.nameSite
|
||||||
|
url = strain.collect.site.links.urlSite
|
||||||
|
value = rgetattr(strain, attribute)
|
||||||
|
value = ";".join(value)
|
||||||
|
if name is not None and url is not None:
|
||||||
|
value = f"{name};{url}"
|
||||||
|
else:
|
||||||
|
value = None
|
||||||
|
|
||||||
elif attribute == "collect.location":
|
elif attribute == "collect.location":
|
||||||
location = strain.collect.location
|
location = strain.collect.location
|
||||||
|
|||||||
@ -3,6 +3,7 @@ from pathlib import Path
|
|||||||
DATA_DIR = Path(__file__).parent / "data"
|
DATA_DIR = Path(__file__).parent / "data"
|
||||||
|
|
||||||
ACCESSION_NUMBER = "accession_number"
|
ACCESSION_NUMBER = "accession_number"
|
||||||
|
MIRRI_ACCESSION_NUMBER = 'mirri_accession_number'
|
||||||
RESTRICTION_ON_USE = "restriction_on_use"
|
RESTRICTION_ON_USE = "restriction_on_use"
|
||||||
NAGOYA_PROTOCOL = "nagoya_protocol"
|
NAGOYA_PROTOCOL = "nagoya_protocol"
|
||||||
ABS_RELATED_FILES = "abs_related_files"
|
ABS_RELATED_FILES = "abs_related_files"
|
||||||
@ -14,6 +15,7 @@ DUAL_USE = "dual_use"
|
|||||||
QUARANTINE = "quarantine"
|
QUARANTINE = "quarantine"
|
||||||
ORGANISM_TYPE = "organism_type"
|
ORGANISM_TYPE = "organism_type"
|
||||||
TAXON_NAME = "taxon_name"
|
TAXON_NAME = "taxon_name"
|
||||||
|
TYPE = "type"
|
||||||
INFRASUBSPECIFIC_NAME = "infrasubspecific_names"
|
INFRASUBSPECIFIC_NAME = "infrasubspecific_names"
|
||||||
COMMENTS_ON_TAXONOMY = "comments_on_taxonomy"
|
COMMENTS_ON_TAXONOMY = "comments_on_taxonomy"
|
||||||
STATUS = "status"
|
STATUS = "status"
|
||||||
@ -54,6 +56,9 @@ SUBSTRATE_HOST_OF_ISOLATION = "substrate_host_of_isolation"
|
|||||||
ISOLATION_HABITAT = "isolation_habitat"
|
ISOLATION_HABITAT = "isolation_habitat"
|
||||||
ONTOBIOTOPE_ISOLATION_HABITAT = "ontobiotope_term_for_the_isolation_habitat"
|
ONTOBIOTOPE_ISOLATION_HABITAT = "ontobiotope_term_for_the_isolation_habitat"
|
||||||
LITERATURE_LINKED_TO_SEQ_GENOME = "literature_linked_to_the_sequence_genome"
|
LITERATURE_LINKED_TO_SEQ_GENOME = "literature_linked_to_the_sequence_genome"
|
||||||
|
AXENIC_CULTURE = "axenic_culture"
|
||||||
|
QPS ="qps"
|
||||||
|
SITE_LINK = "site_links"
|
||||||
|
|
||||||
# StrainId
|
# StrainId
|
||||||
STRAIN_ID = "id"
|
STRAIN_ID = "id"
|
||||||
@ -99,73 +104,80 @@ ALLOWED_COLLECTING_SITE_KEYS = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
MIRRI_FIELDS = [
|
MIRRI_FIELDS = [
|
||||||
{"attribute": "id", "label": "Accession number"},
|
{"attribute": "id", "label": "accessionNumber"},
|
||||||
{"attribute": "restriction_on_use", "label": "Restrictions on use"},
|
{"attribute": "mirri_accession_number", "label": "mirriAccessionNumber"},
|
||||||
|
{"attribute": "qps", "label": "qps"},
|
||||||
|
{"attribute": "axenic_culture", "label": "axenicCulture"},
|
||||||
|
{"attribute": "restriction_on_use", "label": "useRestrictions"},
|
||||||
{"attribute": "nagoya_protocol",
|
{"attribute": "nagoya_protocol",
|
||||||
"label": "Nagoya protocol restrictions and compliance conditions"},
|
"label": "nagoyaConditions"},
|
||||||
{"attribute": ABS_RELATED_FILES, "label": "ABS related files"},
|
{"attribute": ABS_RELATED_FILES, "label": "absFile"},
|
||||||
{"attribute": "mta_files", "label": "MTA file"},
|
{"attribute": "mta_files", "label": "mtaFile"},
|
||||||
{"attribute": "other_numbers", "label": "Other culture collection numbers"},
|
{"attribute": "other_numbers", "label": "otherCollectionNumbers"},
|
||||||
{"attribute": "is_from_registered_collection",
|
{"attribute": "is_from_registered_collection",
|
||||||
"label": "Strain from a registered collection"},
|
"label": "registeredCollection"},
|
||||||
{"attribute": "risk_group", "label": "Risk Group"},
|
{"attribute": "risk_group", "label": "riskGroup"},
|
||||||
{"attribute": "is_potentially_harmful", "label": "Dual use"},
|
{"attribute": "is_potentially_harmful", "label": "dualUse"},
|
||||||
{"attribute": "is_subject_to_quarantine", "label": "Quarantine in Europe"},
|
{"attribute": "is_subject_to_quarantine", "label": "euQuarantine"},
|
||||||
{"attribute": "taxonomy.organism_type", "label": "Organism type"},
|
{"attribute": "taxonomy.organism_type", "label": "organismType"},
|
||||||
{"attribute": "taxonomy.taxon_name", "label": "Taxon name"},
|
{"attribute": "taxonomy.taxon_name", "label": "speciesName"},
|
||||||
{"attribute": "taxonomy.infrasubspecific_name",
|
{"attribute": "taxonomy.infrasubspecific_name",
|
||||||
"label": "Infrasubspecific names"},
|
"label": "infrasubspecificNames"},
|
||||||
{"attribute": "taxonomy.comments", "label": "Comment on taxonomy"},
|
{"attribute": "taxonomy.comments", "label": "taxonomyComments"},
|
||||||
{"attribute": "taxonomy.interspecific_hybrid",
|
{"attribute": "taxonomy.interspecific_hybrid",
|
||||||
"label": "Interspecific hybrid"},
|
"label": "hybrid"},
|
||||||
{"attribute": "status", "label": "Status"},
|
{"attribute": "status", "label": "status"},
|
||||||
{"attribute": "history", "label": "History of deposit", },
|
{"attribute": "history", "label": "depositHistory", },
|
||||||
{"attribute": "deposit.who", "label": "Depositor"},
|
{"attribute": "deposit.who", "label": "depositor"},
|
||||||
{"attribute": "deposit.date", "label": "Date of deposit"},
|
{"attribute": "deposit.date", "label": "depositDate"},
|
||||||
{"attribute": "catalog_inclusion_date",
|
{"attribute": "catalog_inclusion_date",
|
||||||
"label": "Date of inclusion in the catalogue"},
|
"label": "accessionDate"},
|
||||||
{"attribute": "collect.who", "label": "Collected by"},
|
{"attribute": "collect.who", "label": "collector"},
|
||||||
{"attribute": "collect.date", "label": "Date of collection"},
|
{"attribute": "collect.date", "label": "collectionDate"},
|
||||||
{"attribute": "isolation.who", "label": "Isolated by"},
|
{"attribute": "isolation.who", "label": "isolator"},
|
||||||
{"attribute": "isolation.date", "label": "Date of isolation"},
|
{"attribute": "isolation.date", "label": "isolationDate"},
|
||||||
{"attribute": "isolation.substrate_host_of_isolation",
|
{"attribute": "isolation.substrate_host_of_isolation",
|
||||||
"label": "Substrate/host of isolation"},
|
"label": "substrate"},
|
||||||
{"attribute": "growth.tested_temp_range",
|
{"attribute": "growth.tested_temp_range",
|
||||||
"label": "Tested temperature growth range"},
|
"label": "temperatureGrowthRange"},
|
||||||
{"attribute": "growth.recommended_temp",
|
{"attribute": "growth.recommended_temp",
|
||||||
"label": "Recommended growth temperature"},
|
"label": "recommendedTemperature"},
|
||||||
{"attribute": "growth.recommended_media",
|
{"attribute": "growth.recommended_media",
|
||||||
"label": "Recommended medium for growth"},
|
"label": "recommendedMedium"},
|
||||||
{"attribute": "form_of_supply", "label": "Form of supply"},
|
{"attribute": "form_of_supply", "label": "supplyForms"},
|
||||||
{"attribute": "other_denominations", "label": "Other denomination"},
|
{"attribute": "other_denominations", "label": "otherDenomination"},
|
||||||
{"attribute": "collect.location.coords",
|
{"attribute": "collect.location.coords",
|
||||||
"label": "Coordinates of geographic origin"},
|
"label": "geographicCoordinates"},
|
||||||
|
{"attribute": "collect.site.links",
|
||||||
|
"label": "siteLinks"},
|
||||||
{"attribute": "collect.location.altitude",
|
{"attribute": "collect.location.altitude",
|
||||||
"label": "Altitude of geographic origin"},
|
"label": "country"},
|
||||||
{"attribute": "collect.location", "label": "Geographic origin"},
|
{"attribute": "collect.location", "label": "geographicOrigin"},
|
||||||
{"attribute": "collect.habitat", "label": "Isolation habitat"},
|
{"attribute": "collect.habitat", "label": "isolationHabitat"},
|
||||||
{"attribute": "collect.habitat_ontobiotope",
|
{"attribute": "collect.habitat_ontobiotope",
|
||||||
"label": "Ontobiotope term for the isolation habitat"},
|
"label": "ontobiotopeTerms"},
|
||||||
{"attribute": "genetics.gmo", "label": "GMO"},
|
{"attribute": "genetics.gmo", "label": "gmo"},
|
||||||
{"attribute": "genetics.gmo_construction",
|
{"attribute": "genetics.gmo_construction",
|
||||||
"label": "GMO construction information"},
|
"label": "gmoConstruction"},
|
||||||
{"attribute": "genetics.mutant_info", "label": "Mutant information"},
|
{"attribute": "genetics.mutant_info", "label": "mutant"},
|
||||||
{"attribute": "genetics.genotype", "label": "Genotype"},
|
{"attribute": "genetics.genotype", "label": "genotype"},
|
||||||
{"attribute": "genetics.sexual_state", "label": "Sexual state"},
|
{"attribute": "genetics.sexual_state", "label": "sexualState"},
|
||||||
{"attribute": "genetics.ploidy", "label": "Ploidy"},
|
{"attribute": "genetics.ploidy", "label": "ploidy"},
|
||||||
{"attribute": "genetics.plasmids", "label": "Plasmids"},
|
{"attribute": "genetics.plasmids", "label": "plasmids"},
|
||||||
{"attribute": "genetics.plasmids_in_collections",
|
{"attribute": "genetics.plasmids_in_collections",
|
||||||
"label": "Plasmids collections fields"},
|
"label": "plasmidCollections"},
|
||||||
{"attribute": "publications", "label": "Literature"},
|
{"attribute": "publications", "label": "identificationLiterature"},
|
||||||
{"attribute": PLANT_PATHOGENICITY_CODE, "label": "Plant pathogenicity code"},
|
{"attribute": PLANT_PATHOGENICITY_CODE, "label": "Plant pathogenicity code"},
|
||||||
{"attribute": "pathogenicity", "label": "Pathogenicity"},
|
{"attribute": "pathogenicity", "label": "pathogenicity"},
|
||||||
{"attribute": "enzyme_production", "label": "Enzyme production"},
|
{"attribute": "enzyme_production", "label": "enzymes"},
|
||||||
{"attribute": "production_of_metabolites",
|
{"attribute": "production_of_metabolites",
|
||||||
"label": "Production of metabolites"},
|
"label": "metabolites"},
|
||||||
{"attribute": "applications", "label": "Applications", },
|
{"attribute": "type",
|
||||||
{"attribute": "remarks", "label": "Remarks"},
|
"label": "type"},
|
||||||
|
{"attribute": "applications", "label": "applications", },
|
||||||
|
{"attribute": "remarks", "label": "remarks"},
|
||||||
{"attribute": LITERATURE_LINKED_TO_SEQ_GENOME,
|
{"attribute": LITERATURE_LINKED_TO_SEQ_GENOME,
|
||||||
"label": "Literature linked to the sequence/genome"},
|
"label": "sequenceLiterature"},
|
||||||
]
|
]
|
||||||
|
|
||||||
ALLOWED_SUBTAXA = ["subspecies", "variety", "convarietas", "group", "forma",
|
ALLOWED_SUBTAXA = ["subspecies", "variety", "convarietas", "group", "forma",
|
||||||
@ -228,8 +240,9 @@ ALLOWED_MARKER_TYPES = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
PUBLICATIONS = "publications"
|
PUBLICATIONS = "publications"
|
||||||
PUB_ID = "id"
|
PUB_ID = "pub_id"
|
||||||
PUB_DOI = "pub_doi"
|
PUB_DOI = "pub_doi"
|
||||||
|
PUB_PMID = "pub_pmid"
|
||||||
PUB_PUBMED_ID = ''
|
PUB_PUBMED_ID = ''
|
||||||
PUB_FULL_REFERENCE = "full_reference"
|
PUB_FULL_REFERENCE = "full_reference"
|
||||||
PUB_TITLE = "title"
|
PUB_TITLE = "title"
|
||||||
@ -247,6 +260,8 @@ BOOK_PUBLISHER = "book_publisher"
|
|||||||
|
|
||||||
PUBLICATION_FIELDS = [
|
PUBLICATION_FIELDS = [
|
||||||
{"label": "ID", "attribute": PUB_ID},
|
{"label": "ID", "attribute": PUB_ID},
|
||||||
|
{"label": "PMID", "attribute": PUB_PMID},
|
||||||
|
{"label": "DOI", "attribute": PUB_DOI},
|
||||||
{"label": "Full reference", "attribute": PUB_FULL_REFERENCE},
|
{"label": "Full reference", "attribute": PUB_FULL_REFERENCE},
|
||||||
{"label": "Authors", "attribute": PUB_AUTHORS},
|
{"label": "Authors", "attribute": PUB_AUTHORS},
|
||||||
{"label": "Title", "attribute": PUB_TITLE},
|
{"label": "Title", "attribute": PUB_TITLE},
|
||||||
@ -282,15 +297,43 @@ SUBTAXAS = {
|
|||||||
"f.sp.": "forma.specialis"
|
"f.sp.": "forma.specialis"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#Control
|
||||||
|
VERSION = "Version"
|
||||||
|
DATE = "Date"
|
||||||
|
|
||||||
|
|
||||||
|
#Country codes
|
||||||
|
COUNTRY = "Country"
|
||||||
|
CODE = "Code"
|
||||||
|
ADDITIONAL_INFORMATION_ON_THE_COUNTRY_OR_CODE = "Additional information on the country or code"
|
||||||
|
|
||||||
|
|
||||||
|
#Country codes files
|
||||||
|
COUNTRY_CODES_SHEET = [
|
||||||
|
{"label": "Country", "attribute": COUNTRY},
|
||||||
|
{"label": "Code", "attribute": CODE},
|
||||||
|
{"label": "Additional information on the country or code", "attribute": ADDITIONAL_INFORMATION_ON_THE_COUNTRY_OR_CODE},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
#Controle files
|
||||||
|
CONTROL_FIELDS = [
|
||||||
|
{"label": "Version", "attribute": VERSION},
|
||||||
|
{"label": "Date", "attribute": DATE},
|
||||||
|
]
|
||||||
|
|
||||||
# Excel sheet name
|
# Excel sheet name
|
||||||
LOCATIONS = "Geographic origin" # 'Locations'
|
LOCATIONS = "Geographic origin" # 'Locations'
|
||||||
GROWTH_MEDIA = "Growth media"
|
GROWTH_MEDIA = "Growth media"
|
||||||
GENOMIC_INFO = "Genomic information"
|
GENOMIC_INFO = "Genomic information"
|
||||||
STRAINS = "Strains"
|
STRAINS = "Strains"
|
||||||
LITERATURE_SHEET = "Literature"
|
LITERATURE_SHEET = "Literature"
|
||||||
SEXUAL_STATE_SHEET = "Sexual states"
|
SEXUAL_STATE_SHEET = "Sexual state"
|
||||||
RESOURCE_TYPES_VALUES = "Resource types values"
|
RESOURCE_TYPES_VALUES = "Resource types values"
|
||||||
FORM_OF_SUPPLY_SHEET = "Forms of supply"
|
FORM_OF_SUPPLY_SHEET = "Forms of supply"
|
||||||
PLOIDY_SHEET = "Ploidy"
|
PLOIDY_SHEET = "Ploidy"
|
||||||
ONTOBIOTOPE = "Ontobiotope"
|
ONTOBIOTOPE = "Ontobiotope"
|
||||||
MARKERS = "Markers"
|
MARKERS = "Markers"
|
||||||
|
CONTROL_SHEET = "Version"
|
||||||
|
COUNTRY_CODES_SHEET = "Country codes"
|
||||||
|
RESOURCE_SHEET = 'Resource types values'
|
||||||
|
|||||||
@ -1,50 +0,0 @@
|
|||||||
from mirri import rgetattr
|
|
||||||
|
|
||||||
|
|
||||||
def validate_strain(strain, version='20200601'):
|
|
||||||
if version == '20200601':
|
|
||||||
return _validate_strain_v20200601(strain)
|
|
||||||
raise NotImplementedError('Only v20200601 is implemented')
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_strain_v20200601(strain):
|
|
||||||
mandatory_attrs = [{'label': 'Accession Number', 'attr': 'id.strain_id'},
|
|
||||||
{'label': 'Nagoya protocol', 'attr': 'nagoya_protocol'},
|
|
||||||
{'label': 'Restriction on use', 'attr': 'restriction_on_use'},
|
|
||||||
{'label': 'Risk group', 'attr': 'risk_group'},
|
|
||||||
{'label': 'Organism type', 'attr': 'taxonomy.organism_type'},
|
|
||||||
{'label': 'Taxon name', 'attr': 'taxonomy.long_name'},
|
|
||||||
{'label': 'Recommended temperature to growth', 'attr': 'growth.recommended_temp'},
|
|
||||||
{'label': 'Recommended media', 'attr': 'growth.recommended_media'},
|
|
||||||
{'label': 'Form of supply', 'attr': 'form_of_supply'},
|
|
||||||
{'label': 'Country', 'attr': 'collect.location.country'}]
|
|
||||||
|
|
||||||
errors = []
|
|
||||||
|
|
||||||
for mandatory in mandatory_attrs:
|
|
||||||
value = rgetattr(strain, mandatory['attr'])
|
|
||||||
if value is None:
|
|
||||||
errors.append(f"{mandatory['label']} is mandatory field")
|
|
||||||
|
|
||||||
if not is_valid_nagoya(strain):
|
|
||||||
errors.append('Not compliant wih nagoya protocol requirements')
|
|
||||||
|
|
||||||
return errors
|
|
||||||
|
|
||||||
|
|
||||||
def is_valid_nagoya(strain):
|
|
||||||
# nagoya_requirements
|
|
||||||
_date = strain.collect.date
|
|
||||||
if _date is None:
|
|
||||||
_date = strain.isolation.date
|
|
||||||
if _date is None:
|
|
||||||
_date = strain.deposit.date
|
|
||||||
if _date is None:
|
|
||||||
_date = strain.catalog_inclusion_date
|
|
||||||
# print(_date)
|
|
||||||
year = None if _date is None else _date._year
|
|
||||||
|
|
||||||
if year is not None and year >= 2014 and strain.collect.location.country is None:
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
@ -62,6 +62,10 @@ class Entity():
|
|||||||
def GID(self) -> str:
|
def GID(self) -> str:
|
||||||
return 'Genomic Information'
|
return 'Genomic Information'
|
||||||
|
|
||||||
|
|
||||||
|
def VRS(self) -> str:
|
||||||
|
return 'Version'
|
||||||
|
|
||||||
def OTD(self) -> str:
|
def OTD(self) -> str:
|
||||||
return 'Ontobiotope'
|
return 'Ontobiotope'
|
||||||
|
|
||||||
|
|||||||
@ -92,6 +92,9 @@ class ErrorMessage():
|
|||||||
|
|
||||||
def EFS08(self):
|
def EFS08(self):
|
||||||
return "The 'Genomic information' sheet is missing. Please check the provided excel template."
|
return "The 'Genomic information' sheet is missing. Please check the provided excel template."
|
||||||
|
|
||||||
|
def EFS09(self):
|
||||||
|
return "The 'Version' sheet is missing. Please check the provided excel template."
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Growth Media Error Codes
|
Growth Media Error Codes
|
||||||
@ -147,26 +150,26 @@ class ErrorMessage():
|
|||||||
def LID03(self):
|
def LID03(self):
|
||||||
return "The 'Full reference' column is a mandatory field in the Literature sheet. The column can not be empty."
|
return "The 'Full reference' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||||
|
|
||||||
def LID04(self):
|
#def LID04(self):
|
||||||
return f"The 'Full reference' for literature with ID {self.pk} is missing."
|
#return f"The 'Full reference' for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
def LID05(self):
|
def LID05(self):
|
||||||
return "The 'Authors' column is a mandatory field in the Literature sheet. The column can not be empty."
|
return "The 'Authors' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||||
|
|
||||||
def LID06(self):
|
#def LID06(self):
|
||||||
return f"The 'Authors' for literature with ID {self.pk} is missing."
|
#return f"The 'Authors' for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
def LID07(self):
|
def LID07(self):
|
||||||
return "The 'Title' column is a mandatory field in the Literature sheet. The column can not be empty."
|
return "The 'Title' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||||
|
|
||||||
def LID08(self):
|
#def LID08(self):
|
||||||
return f"The 'Title' for literature with ID {self.pk} is missing."
|
#return f"The 'Title' for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
def LID09(self):
|
def LID09(self):
|
||||||
return "The 'Journal' column is a mandatory field in the Literature sheet. The column can not be empty."
|
return "The 'Journal' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||||
|
|
||||||
def LID10(self):
|
#def LID10(self):
|
||||||
return f"The 'Journal' for literature with ID {self.pk} is missing."
|
#return f"The 'Journal' for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
def LID11(self):
|
def LID11(self):
|
||||||
return "The 'Year' column is a mandatory field in the Literature sheet. The column can not be empty."
|
return "The 'Year' column is a mandatory field in the Literature sheet. The column can not be empty."
|
||||||
@ -187,167 +190,191 @@ class ErrorMessage():
|
|||||||
return f"The 'First page' for literature with ID {self.pk} is missing."
|
return f"The 'First page' for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
def LID17(self):
|
def LID17(self):
|
||||||
msg = 'If journal; Title, Authors, journal, year and first page are required'
|
return( f"There are four types of ways to fill in the 'Literature' sheet.",
|
||||||
msg += 'If Book; Book Title, Authors, Year, Editors, Publishers'
|
"1st- Columns 'ID' and 'DOI' must be obrigatory.",
|
||||||
return msg
|
"2nd-Columns 'ID' and 'PMID' are obrigatory.",
|
||||||
|
"3rd-Columns 'ID' and 'Full reference' are obrigatory.",
|
||||||
|
"In the alternative of these three types of forms not being filled in, we have:",
|
||||||
|
"4th-Columns 'ID', 'Authors', 'Title', 'Journal', 'Year', 'Volume', 'First page'.")
|
||||||
|
|
||||||
|
def LID18(self):
|
||||||
|
return "The 'PMID' column is a mandatory field. The column can not be empty."
|
||||||
|
|
||||||
|
#def LID19(self):
|
||||||
|
#return f"PMID for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
|
def LID20(self):
|
||||||
|
return "The 'DOI' column is a mandatory field. The column can not be empty."
|
||||||
|
|
||||||
|
#def LID21(self):
|
||||||
|
#return f"DOI for literature with ID {self.pk} is missing."
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Strains Error Codes
|
Strains Error Codes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def STD01(self):
|
def STD01(self):
|
||||||
return "The 'Accession number' column is a mandatory field in the Strains sheet."
|
return "The 'accessionNumber' column is a mandatory field in the Strains sheet."
|
||||||
|
|
||||||
def STD02(self):
|
def STD02(self):
|
||||||
return "The 'Accession number' column is empty or has missing values."
|
return "The 'accessionNumber' column is empty or has missing values."
|
||||||
|
|
||||||
def STD03(self):
|
def STD03(self):
|
||||||
return f"The 'Accesion number' must be unique. The '{self.value}' is repeated."
|
return f"The 'accessionNumber' must be unique. The '{self.value}' is repeated."
|
||||||
|
|
||||||
def STD04(self):
|
def STD04(self):
|
||||||
return (f"The 'Accession number' {self.pk} is not according to the specification."
|
return (f"The 'accessionNumber' {self.pk} is not according to the specification."
|
||||||
" The value must be of the format '<Sequence of characters> <sequence of characters>'.")
|
" The value must be of the format '<Sequence of characters> <sequence of characters>'.")
|
||||||
|
|
||||||
def STD05(self):
|
def STD05(self):
|
||||||
return f"The 'Restriction on use' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
return f"The 'useRestrictions' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
def STD06(self):
|
def STD06(self):
|
||||||
return f"The 'Restriction on use' for strain with Accession Number {self.pk} is missing."
|
return f"The 'useRestrictions' for strain with accessionNumber {self.pk} is missing."
|
||||||
|
|
||||||
def STD07(self):
|
def STD07(self):
|
||||||
return (f"The 'Restriction on use' for strain with Accession Number {self.pk} is not according to the specification."
|
return (f"The 'useRestrictions' for strain with accessionNumber {self.pk} is not according to the specification."
|
||||||
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
||||||
|
|
||||||
def STD08(self):
|
def STD08(self):
|
||||||
return f"The 'Nagoya protocol restrictions and compliance conditions' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
return f"The 'nagoyaConditions' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
def STD09(self):
|
def STD09(self):
|
||||||
return f"The 'Nagoya protocol restrictions and compliance conditions' for strain with Accession Number {self.pk} is missing."
|
return f"The 'nagoyaConditions' for strain with accessionNumber {self.pk} is missing."
|
||||||
|
|
||||||
def STD10(self):
|
def STD10(self):
|
||||||
return (f"The 'Nagoya protocol restrictions and compliance conditions' for strain with Accession Number {self.pk} is not according to the specification."
|
return (f"The 'nagoyaConditions' for strain with accessionNumber {self.pk} is not according to the specification."
|
||||||
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
||||||
|
|
||||||
def STD11(self):
|
def STD11(self):
|
||||||
return (f"The 'Strain from a registered collection' for strain with Accession Number {self.pk} is not according to specification."
|
return (f"The 'registeredCollection' for strain with accessionNumber {self.pk} is not according to specification."
|
||||||
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
f" Your value is {self.value} and the accepted values are 1, 2, 3.")
|
||||||
|
|
||||||
def STD12(self):
|
def STD12(self):
|
||||||
return "The 'Risk group' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
return "The 'riskGroup' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
def STD13(self):
|
def STD13(self):
|
||||||
return f"The 'Risk group' for strain with Accession Number {self.pk} is missing."
|
return f"The 'riskGroup' for strain with accessionNumber {self.pk} is missing."
|
||||||
|
|
||||||
def STD14(self):
|
def STD14(self):
|
||||||
return (f"The 'Risk group' for strain with Accession Number {self.pk} is not according to specification."
|
return (f"The 'riskGroup' for strain with accessionNumber {self.pk} is not according to specification."
|
||||||
f" Your value is {self.value} and the accepted values are 1, 2, 3, 4.")
|
f" Your value is {self.value} and the accepted values are 1, 2, 3, 4.")
|
||||||
|
|
||||||
def STD15(self):
|
def STD15(self):
|
||||||
return (f"The 'Dual use' for strain with Accession Number {self.pk} is not according to specification."
|
return (f"The 'dualUse' for strain with accessionNumber {self.pk} is not according to specification."
|
||||||
f" Your value is {self.value} and the accepted values are 1, 2.")
|
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||||
|
|
||||||
def STD16(self):
|
def STD16(self):
|
||||||
return (f"The “Quarantine in europe” for strain with Accession Number {self.pk} is not according to specification."
|
return (f"The “euQuarantine” for strain with accessionNumber {self.pk} is not according to specification."
|
||||||
f" Your value is {self.value} and the accepted values are 1, 2.")
|
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||||
|
|
||||||
def STD17(self):
|
def STD17(self):
|
||||||
return f"The 'Organism type' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
return f"The 'organismType' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
def STD18(self):
|
def STD18(self):
|
||||||
return f"The 'Organism type' for strain with Accession Number {self.pk} is missing."
|
return f"The 'organismType' for strain with accessionNumber {self.pk} is missing."
|
||||||
|
|
||||||
def STD19(self):
|
def STD19(self):
|
||||||
return (f"The 'Organism type' for strain with Accession Number {self.pk} is not according to specification."
|
return (f"The 'organismType' for strain with accessionNumber {self.pk} is not according to specification."
|
||||||
f" Your value is {self.value} and the accepted values are 'Algae', 'Archaea', 'Bacteria', 'Cyanobacteria', "
|
f" Your value is {self.value} and the accepted values are 'Algae', 'Archaea', 'Bacteria', 'Cyanobacteria', "
|
||||||
"'Filamentous Fungi', 'Phage', 'Plasmid', 'Virus', 'Yeast', 1, 2, 3, 4, 5, 6, 7, 8, 9.")
|
"'Filamentous Fungi', 'Phage', 'Plasmid', 'Virus', 'Yeast', 1, 2, 3, 4, 5, 6, 7, 8, 9.")
|
||||||
|
|
||||||
def STD20(self):
|
def STD20(self):
|
||||||
return f"The 'Taxon name' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
return f"The 'speciesName' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
def STD21(self):
|
def STD21(self):
|
||||||
return f"The 'Taxon name' for strain with Accession Number {self.pk} is missing."
|
return f"The 'speciesName' for strain with accessionNumber {self.pk} is missing."
|
||||||
|
|
||||||
def STD22(self):
|
def STD22(self):
|
||||||
return f"The 'Taxon name' for strain with Accession Number {self.pk} is incorrect."
|
return f"The 'speciesName' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
|
||||||
def STD23(self):
|
def STD23(self):
|
||||||
return (f"The 'Interspecific hybrid' for strain with Accession Number {self.pk} is not according to specification."
|
return (f"The 'hybrid' for strain with accessionNumber {self.pk} is not according to specification."
|
||||||
f" Your value is {self.value} and the accepted values are 1, 2.")
|
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||||
|
|
||||||
def STD24(self):
|
def STD24(self):
|
||||||
return f"The 'History of deposit' for strain with Accession Number {self.pk} is incorrect."
|
return (f"The 'depositHistory' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
"The field includes entries separated by '<' meaning 'received from'."
|
||||||
|
"Entries may include persons or CCs. The name of the CC should be followed by"
|
||||||
|
"the month, when available, and year of the acquisition. Between parentheses,"
|
||||||
|
"the strain designation or CC numbers and/or a name can also be entered when "
|
||||||
|
"a name change has occurred.")
|
||||||
|
|
||||||
def STD25(self):
|
def STD25(self):
|
||||||
return (f"The 'Date of deposit' for strain with Accession Number {self.pk} is incorrect."
|
return (f"The 'depositDate' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||||
|
|
||||||
def STD26(self):
|
def STD26(self):
|
||||||
return (f"The 'Date of inclusion in the catalogue' for strain with Accession Number {self.pk} is incorrect."
|
return (f"The 'accessionDate' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||||
|
|
||||||
def STD27(self):
|
def STD27(self):
|
||||||
return (f"The 'Date of collection' for strain with Accession Number {self.pk} is incorrect."
|
return (f"The 'collectionDate' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||||
|
|
||||||
def STD28(self):
|
def STD28(self):
|
||||||
return (f"The 'Date of isolation' for strain with Accession Number {self.pk} is incorrect."
|
return (f"The 'isolationDate' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
" The allowed formats are 'YYYY-MM-DD', 'YYYYMMDD', 'YYYYMM', and 'YYYY'.")
|
||||||
|
|
||||||
def STD29(self):
|
def STD29(self):
|
||||||
return (f"The 'Tested temperature growth range' for strain with Accession Number {self.pk} is incorrect."
|
return (f"The 'temperatureGrowthRange' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
" It must have two decimal numbers separated by ','")
|
" It must have two decimal numbers separated by ','")
|
||||||
|
|
||||||
def STD30(self):
|
def STD30(self):
|
||||||
return f"The 'Recommended growth temperature' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
return f"The 'temperatureGrowthRange' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
def STD31(self):
|
def STD31(self):
|
||||||
return f"The 'Recommended growth temperature' for strain with Accession Number {self.pk} is missing."
|
return f"The 'temperatureGrowthRange' for strain with accessionNumber {self.pk} is missing."
|
||||||
|
|
||||||
def STD32(self):
|
def STD32(self):
|
||||||
return (f"The 'Recommended growth temperature' for strain with Accession Number {self.pk} is incorrect."
|
return (f"The 'temperatureGrowthRange' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
" It must have two decimal numbers separated by ','.")
|
" It must have two decimal numbers separated by ','.")
|
||||||
|
|
||||||
def STD33(self):
|
def STD33(self):
|
||||||
return f"The 'Recommended medium for growth' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
return ("The 'recommendedTemperature' column is a mandatory field in the Strains Sheet. The column can not be empty.")
|
||||||
|
|
||||||
def STD34(self):
|
def STD34(self):
|
||||||
return f"The 'Recommended medium for growth' for strain with Accession Number {self.pk} is missing."
|
return f"The 'recommendedTemperature' for strain with accessionNumber {self.pk} is missing."
|
||||||
|
|
||||||
def STD35(self):
|
def STD35(self):
|
||||||
return f"The value of 'Recommended medium for growth' for strain with Accession Number {self.pk} is not in the Growth Media Sheet."
|
return f"The value of 'recommendedTemperature' for strain with accessionNumber {self.pk} is not in the Growth Media Sheet."
|
||||||
|
|
||||||
def STD36(self):
|
def STD36(self):
|
||||||
return f"The 'Forms of supply' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
return f"The 'supplyForms' column is a mandatory field in the Strains Sheet. The column can not be empty."
|
||||||
|
|
||||||
def STD37(self):
|
def STD37(self):
|
||||||
return f"The 'Forms of supply' for strain with Accession Number {self.pk} is missing."
|
return f"The 'supplyForms' for strain with accessionNumber {self.pk} is missing."
|
||||||
|
|
||||||
def STD38(self):
|
def STD38(self):
|
||||||
return f"The value of 'Forms of supply' for strain with Accession Number {self.pk} is not in the Forms of Supply Sheet."
|
return f"The value of 'supplyForms' for strain with accessionNumber {self.pk} is not in the Forms of Supply Sheet."
|
||||||
|
|
||||||
def STD39(self):
|
def STD39(self):
|
||||||
return (f"The 'Coordinates of geographic origin' column for strain with Accession Number {self.pk} is incorrect."
|
return (f"The 'geographicCoordinates' column for strain with accessionNumber {self.pk} is incorrect."
|
||||||
"The allowed formats are two or three decimal numbers separated by ','. Moreover, the first number must be"
|
"The allowed formats are two, three or four decimal numbers separated by ','. Moreover, the first number must be."
|
||||||
"between [-90, 90], the second between [-180, 180], and the third, if provided, can assume any value.")
|
"between [-90, 90], the second between [-180, 180], and the third and fourth refers to the precision and altitude, defined by decimal numbers."
|
||||||
|
"Put a question mark for lack of precision or altitude when one of them is missing. Leave the values blank when both are missing. ")
|
||||||
|
|
||||||
def STD40(self):
|
def STD40(self):
|
||||||
return (f"The 'Altitude of geographic origin' column for strain with Accession Number {self.pk} is incorrect."
|
return (f"The 'country' column for strain with accessionNumber {self.pk} is incorrect."
|
||||||
"The allowed formats are one decimal number between [-200, 8000].")
|
"The allowed formats are one decimal number between [-200, 8000].")
|
||||||
|
def STD54(self):
|
||||||
|
return (f"The 'country'column is a mandatory field in the Strains Sheet. The column can not be empty.")
|
||||||
|
def STD55(self):
|
||||||
|
return (f"The 'country' for strain with accessionNumber {self.pk} is missing.")
|
||||||
|
|
||||||
def STD41(self):
|
def STD41(self):
|
||||||
return f"The value of 'Ontobiotope term for the isolation habitat' for strain with Accession Number {self.pk} is not in the Ontobiotope Sheet."
|
return f"The value of 'ontobiotopeTerms' for strain with accessionNumber {self.pk} is not in the Ontobiotope Sheet."
|
||||||
|
|
||||||
def STD42(self):
|
def STD42(self):
|
||||||
return (f"The 'GMO' for strain with Accession Number {self.pk} is not according to specification."
|
return (f"The 'gmo' for strain with accessionNumber {self.pk} is not according to specification."
|
||||||
f" Your value is {self.value} and the accepted values are 1, 2")
|
f" Your value is {self.value} and the accepted values are 1, 2")
|
||||||
|
|
||||||
def STD43(self):
|
def STD43(self):
|
||||||
return (f"The 'Sexual State' for strain with Accession Number {self.pk} is not according to specification."
|
return (f"The 'sexualState' for strain with accessionNumber {self.pk} is not according to specification."
|
||||||
f" Your value is {self.value} and the accepted values are 'Mata', 'Matalpha', 'Mata/Matalpha', "
|
f" Your value is {self.value} and the accepted values are 'Mata', 'Matalpha', 'Mata/Matalpha', "
|
||||||
"'Matb', 'Mata/Matb', 'MTLa', 'MTLalpha', 'MTLa/MTLalpha', 'MAT1-1', 'MAT1-2', 'MAT1', 'MAT2', 'MT+', 'MT-'")
|
"'Matb', 'Mata/Matb', 'MTLa', 'MTLalpha', 'MTLa/MTLalpha', 'MAT1-1', 'MAT1-2', 'MAT1', 'MAT2', 'MT+', 'MT-'")
|
||||||
|
|
||||||
def STD44(self):
|
def STD44(self):
|
||||||
return (f"The 'Ploidy' for strain with Accession Number {self.pk} is not according to specification."
|
return (f"The 'ploidy' for strain with accessionNumber {self.pk} is not according to specification."
|
||||||
f" Your value is {self.value} and the accepted values are 0, 1, 2, 3, 4, 9")
|
f" Your value is {self.value} and the accepted values are 0, 1, 2, 3, 4, 9")
|
||||||
|
|
||||||
def STD45(self):
|
def STD45(self):
|
||||||
@ -356,24 +383,97 @@ class ErrorMessage():
|
|||||||
return msg
|
return msg
|
||||||
|
|
||||||
def STD46(self):
|
def STD46(self):
|
||||||
msg = f"If date of collection/isolation/deposit/inclusion in the catalog is after 2014," \
|
return (f"The 'geographicOrigin' for strain with accessionNumber {self.pk} is not according to specification."
|
||||||
f" the value of column Geographic Origin must be provided and associated with a country in the " \
|
f"The 'geographicOrigin' column must consist of the ID's associated with the Geographic origin sheet.")
|
||||||
f"Geographic Origin sheet. The value is missing or not associated with a country for strain {self.pk}."
|
|
||||||
return msg
|
|
||||||
|
|
||||||
|
def STD47(self):
|
||||||
|
return "The 'country' column is a mandatory field in the Strains sheet."
|
||||||
|
|
||||||
|
def STD48(self):
|
||||||
|
return "The 'country' column is empty or has missing values."
|
||||||
|
|
||||||
|
def STD49(self):
|
||||||
|
return (f"The “qps” for strain with accessionNumber {self.pk} is not according to specification."
|
||||||
|
f" Your value is {self.value} and the accepted values are 1, 2.")
|
||||||
|
|
||||||
|
def STD50(self):
|
||||||
|
return (f"The “axenicCulture” for strain with accessionNumber {self.pk} is not according to specification."
|
||||||
|
f" Your value is {self.value} and the accepted values are 'Axenic', 'Not axenic'.")
|
||||||
|
|
||||||
|
def STD51(self):
|
||||||
|
return f"The 'mirriAccessionNumber' must be unique. The '{self.pk}' is repeated."
|
||||||
|
|
||||||
|
def STD52(self):
|
||||||
|
return (f"The 'mirriAccessionNumber' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
" It must have the expression MIRRI followed by 7 digits")
|
||||||
|
|
||||||
|
def STD53(self):
|
||||||
|
return (f"The 'siteLinks' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
" The displayed expression it should be composed of: site name ';' website url." )
|
||||||
|
|
||||||
|
def STD56(self):
|
||||||
|
return (f"The 'siteLinks' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
" The url must be valid. " )
|
||||||
|
def STD57(self):
|
||||||
|
return (f"The 'country' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
"This information must be expressed by using the ISO-3166 standard for country"
|
||||||
|
"codes. The preferred set is ISO 3166-1 alpha-2 (two letters code), but ISO 3166-"
|
||||||
|
"1 alpha-3 (three letters code) is also accepted. Former country codes must"
|
||||||
|
"follow standard’s part three ISO 3166-3 (four letters code). Only one code can"
|
||||||
|
"be included." )
|
||||||
|
def STD58(self):
|
||||||
|
return (f"The 'mtaFile' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
" The url must be valid. " )
|
||||||
|
def STD59(self):
|
||||||
|
return (f"The 'absFile' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
"The displayed expression it should be composed of: name ';' website url."
|
||||||
|
"When only one URL is provided, the title may be omitted. In this case, the URL"
|
||||||
|
"will be shown in clear to users." )
|
||||||
|
def STD60(self):
|
||||||
|
return (f"The 'absFile' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
" The url must be valid. ")
|
||||||
|
def STD61(self):
|
||||||
|
return (f"The 'sequenceLiterature' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
"Numeric identifiers separated by a semicolon ';'.")
|
||||||
|
|
||||||
|
def STD62(self):
|
||||||
|
return (f"The 'plasmidCollections' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
"It should include the name of the plasmid followed by the CC number in"
|
||||||
|
"parentheses. More than one plasmid can be reported, separated by ';'. "
|
||||||
|
"Plasmid names should be provided as free text."
|
||||||
|
"CC numbers should be composed by the CC acronym followed by a number"
|
||||||
|
"separated by a space'. Numeric identifiers separated by a semicolon ';'.")
|
||||||
|
|
||||||
|
def STD63(self):
|
||||||
|
return (f"The 'otherCollectionNumbers' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
" The value must be of the format '<Sequence of characters> <sequence of characters>'.")
|
||||||
|
|
||||||
|
def STD64(self):
|
||||||
|
return (f"The 'type' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
f"Your value is {self.value} and the accepted values are 1, 2.")
|
||||||
|
|
||||||
|
def STD65(self):
|
||||||
|
return (f"The 'status' for strain with accessionNumber {self.pk} is incorrect."
|
||||||
|
"The structure should be 'type of <character string>.")
|
||||||
|
|
||||||
|
def STD68(self):
|
||||||
|
return (f"The 'geographicOrigin'column is a mandatory field in the Strains Sheet. The column can not be empty.")
|
||||||
|
|
||||||
|
def STD69(self):
|
||||||
|
return (f"The 'geographicOrigin' for strain with accessionNumber {self.pk} is missing.")
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Genomic Information Error Codes
|
Genomic Information Error Codes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def GID01(self):
|
def GID01(self):
|
||||||
return f"The 'Strain Acession Number' (Strain AN) column is a mandatory field in the Genomic Information Sheet."
|
return f"The 'Strain accessionNumber' (Strain AN) column is a mandatory field in the Genomic Information Sheet."
|
||||||
|
|
||||||
def GID02(self):
|
def GID02(self):
|
||||||
return f"The 'Strain Acession Number' (Strain AN) column is empty or has missing values."
|
return f"The 'Strain accessionNumber' (Strain AN) column is empty or has missing values."
|
||||||
|
|
||||||
def GID03(self):
|
def GID03(self):
|
||||||
return f"The value of 'Strain Acession Number' (Strain AN) {self.value} is not in the Strains sheet."
|
return f"The value of 'Strain accessionNumber' (Strain AN) {self.value} is not in the Strains sheet."
|
||||||
|
|
||||||
def GID04(self):
|
def GID04(self):
|
||||||
return f"The 'Marker' column is a mandatory field in the Genomic Information Sheet. The column can not be empty."
|
return f"The 'Marker' column is a mandatory field in the Genomic Information Sheet. The column can not be empty."
|
||||||
@ -397,6 +497,35 @@ class ErrorMessage():
|
|||||||
return (f"The 'Sequence' for genomic information with Strain AN {self.pk} is incorrect."
|
return (f"The 'Sequence' for genomic information with Strain AN {self.pk} is incorrect."
|
||||||
" It must be a sequence of 'G', 'T', 'A', 'C' characteres of any length and without white spaces.")
|
" It must be a sequence of 'G', 'T', 'A', 'C' characteres of any length and without white spaces.")
|
||||||
|
|
||||||
|
def GID11(self):
|
||||||
|
return (f"The 'Sequence' for genomic information with Strain AN {self.pk} is incorrect."
|
||||||
|
"An INSDC accession number is an alphanumeric"
|
||||||
|
"code made by a fixed number of letters followed by a fixed number of digits,"
|
||||||
|
"without any separation. For sequences, the code is currently made of two"
|
||||||
|
"letters followed by six numbers.")
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Version Error Codes
|
||||||
|
"""
|
||||||
|
|
||||||
|
def VRS01(self):
|
||||||
|
return "The 'Version' columns is a mandatory field in the Version Sheet."
|
||||||
|
|
||||||
|
def VRS02(self):
|
||||||
|
return "The 'Version' columns is empty or has missing values."
|
||||||
|
|
||||||
|
def VRS03(self):
|
||||||
|
return "The 'Date' columns is a mandatory field in the Control Sheet."
|
||||||
|
|
||||||
|
def VRS04(self):
|
||||||
|
return "The 'Date' columns is empty or has missing values."
|
||||||
|
|
||||||
|
def VRS05(self):
|
||||||
|
return f"The version {self.value} is the only one to be used."
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Ontobiotope Error Codes
|
Ontobiotope Error Codes
|
||||||
"""
|
"""
|
||||||
@ -407,8 +536,12 @@ class ErrorMessage():
|
|||||||
def OTD02(self):
|
def OTD02(self):
|
||||||
return "The 'ID' columns is empty or has missing values."
|
return "The 'ID' columns is empty or has missing values."
|
||||||
|
|
||||||
def OTD03(self):
|
#def OTD03(self):
|
||||||
return "The 'Name' columns is a mandatory field in the Ontobiotope Sheet. The column can not be empty."
|
return "The 'Name' columns is a mandatory field in the Ontobiotope Sheet. The column can not be empty."
|
||||||
|
|
||||||
def OTD04(self):
|
#def OTD04(self):
|
||||||
return f"The 'Name' for ontobiotope with ID {self.pk} is missing."
|
return f"The 'Name' for ontobiotope with ID {self.pk} is missing."
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -4,27 +4,51 @@ from io import BytesIO
|
|||||||
from zipfile import BadZipfile
|
from zipfile import BadZipfile
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from calendar import monthrange
|
from calendar import monthrange
|
||||||
|
import requests
|
||||||
from openpyxl import load_workbook
|
from openpyxl import load_workbook
|
||||||
|
import pycountry
|
||||||
|
|
||||||
from mirri.io.parsers.excel import workbook_sheet_reader, get_all_cell_data_from_sheet
|
from mirri.io.parsers.excel import workbook_sheet_reader, get_all_cell_data_from_sheet
|
||||||
from mirri.validation.error_logging import ErrorLog, Error
|
from mirri.validation.error_logging import ErrorLog, Error
|
||||||
from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
|
from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
|
||||||
ERROR_CODE, FIELD, MANDATORY, MATCH,
|
ERROR_CODE, FIELD, MANDATORY, MATCH,
|
||||||
MISSING, MULTIPLE, NAGOYA, NUMBER, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON,
|
MISSING, MULTIPLE, NAGOYA, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON,
|
||||||
TYPE, UNIQUE, VALIDATION, VALUES, BIBLIO)
|
TYPE, UNIQUE, VALIDATION, VALUES, BIBLIO, DOMINIO,URL_DOMINIO, ISO, URL_TITLE,JUST_URL,TITLE,
|
||||||
|
HISTORY,NAGOYA1, VERSION)
|
||||||
from mirri.settings import LOCATIONS, SUBTAXAS
|
from mirri.settings import LOCATIONS, SUBTAXAS
|
||||||
from mirri.validation.validation_conf_20200601 import MIRRI_20200601_VALLIDATION_CONF
|
from mirri.validation.validation_conf_12052023 import version_config
|
||||||
|
|
||||||
|
from mirri.validation.validation_conf_12052023 import MIRRI_12052023_VALLIDATION_CONF
|
||||||
|
|
||||||
|
|
||||||
def validate_mirri_excel(fhand, version="20200601"):
|
def validate_mirri_excel(fhand, version= "5.1.2" ):
|
||||||
if version == "20200601":
|
if version == "5.1.2":
|
||||||
configuration = MIRRI_20200601_VALLIDATION_CONF
|
configuration = MIRRI_12052023_VALLIDATION_CONF
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("Only version20200601 is implemented")
|
raise NotImplementedError("Only version 5.1.2 is implemented")
|
||||||
|
|
||||||
return validate_excel(fhand, configuration)
|
return validate_excel(fhand, configuration)
|
||||||
|
|
||||||
|
def version(value , validation_conf=None):
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
for version in version_config:
|
||||||
|
if value == version :
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def validate_country_code(value,validation_conf=None):
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
if pycountry.countries.get(alpha_2=value) or pycountry.countries.get(alpha_3=value) or pycountry.historic_countries.get(alpha_4 = value):
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def validate_excel(fhand, configuration):
|
def validate_excel(fhand, configuration):
|
||||||
validation_conf = configuration['sheet_schema']
|
validation_conf = configuration['sheet_schema']
|
||||||
@ -185,11 +209,14 @@ def validate_row(row, validation_steps, in_memory_sheets):
|
|||||||
kind = validation_step[TYPE]
|
kind = validation_step[TYPE]
|
||||||
error_code = validation_step[ERROR_CODE]
|
error_code = validation_step[ERROR_CODE]
|
||||||
if kind == NAGOYA:
|
if kind == NAGOYA:
|
||||||
if not is_valid_nagoya(row, in_memory_sheets):
|
if not is_valid_nagoya_v12052023(row, in_memory_sheets):
|
||||||
return error_code
|
return error_code
|
||||||
elif kind == BIBLIO:
|
elif kind == BIBLIO:
|
||||||
if not is_valid_pub(row):
|
if not is_valid_pub(row):
|
||||||
return error_code
|
return error_code
|
||||||
|
elif kind == NAGOYA1:
|
||||||
|
if not is_valid_nago(row):
|
||||||
|
return error_code
|
||||||
else:
|
else:
|
||||||
msg = f'{kind} is not a recognized row validation type method'
|
msg = f'{kind} is not a recognized row validation type method'
|
||||||
raise NotImplementedError(msg)
|
raise NotImplementedError(msg)
|
||||||
@ -207,49 +234,70 @@ def validate_cell(value, validation_steps, crossrefs, shown_values, label):
|
|||||||
|
|
||||||
if error_code is not None:
|
if error_code is not None:
|
||||||
return error_code
|
return error_code
|
||||||
|
|
||||||
|
|
||||||
def is_valid_pub(row):
|
def is_valid_pub(row):
|
||||||
|
pub_id = row.get('ID', None)
|
||||||
|
pub_pmid = row.get('PMID', None)
|
||||||
|
pub_doi = row.get('DOI', None)
|
||||||
title = row.get('Title', None)
|
title = row.get('Title', None)
|
||||||
full_reference = row.get('Full reference', None)
|
full_reference = row.get('Full reference', None)
|
||||||
authors = row.get('Authors', None)
|
authors = row.get('Authors', None)
|
||||||
journal = row.get('Journal', None)
|
journal = row.get('Journal', None)
|
||||||
year = row.get('Year', None)
|
year = row.get('Year', None)
|
||||||
volumen = row.get('Volumen', None)
|
volumen = row.get('Volume', None)
|
||||||
first_page = row.get('First page', None)
|
first_page = row.get('First page', None)
|
||||||
book_title = row.get('Book title', None)
|
book_title = row.get('Book title', None)
|
||||||
editors = row.get('Editors', None)
|
editors = row.get('Editors', None)
|
||||||
publishers = row.get('Publishers', None)
|
publishers = row.get('Publishers', None)
|
||||||
|
|
||||||
if full_reference:
|
if (pub_id != None and pub_doi != None) or (pub_id != None and pub_pmid != None) or (pub_id != None and full_reference != None) or (pub_id != None and authors != None and title != None and journal != None and year != None and volumen != None and first_page != None) :
|
||||||
return True
|
return True
|
||||||
is_journal = bool(title)
|
is_journal = bool(title)
|
||||||
|
|
||||||
if (is_journal and (not authors or not journal or not not year or
|
# if (is_journal and (not authors or not journal or not not year or
|
||||||
not volumen or not first_page)):
|
# not volumen or not first_page)):
|
||||||
return False
|
# return False
|
||||||
if (not is_journal and (not authors or not year or
|
#if (not is_journal and (not authors or not year or
|
||||||
not editors or not publishers or not book_title)):
|
# not editors or not publishers or not book_title)):
|
||||||
return False
|
# return False
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_valid_nago(row):
|
||||||
|
if not row:
|
||||||
|
return True
|
||||||
|
status = row.get("status", None)
|
||||||
|
type = row.get("type", None)
|
||||||
|
regex = r'^[a-zA-Z\s.\'-]+$'
|
||||||
|
|
||||||
|
if status != None and type != None:
|
||||||
|
if (re.match(regex, status) and type==1):
|
||||||
|
return False
|
||||||
|
if (type == 2 and status is None):
|
||||||
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def parsee_mirri_excel(row, in_memory_sheets, version=""):
|
||||||
|
if version == "12052023":
|
||||||
|
return is_valid_nagoya_v12052023 (row, in_memory_sheets)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("Only version is implemented")
|
||||||
|
|
||||||
def is_valid_nagoya(row, in_memory_sheets): # sourcery skip: return-identity
|
def is_valid_nagoya_v12052023(row, in_memory_sheets): # sourcery skip: return-identity
|
||||||
location_index = row.get('Geographic origin', None)
|
location_index = row.get('geographicOrigin', None)
|
||||||
if location_index is None:
|
if location_index is None:
|
||||||
country = None
|
country = None
|
||||||
else:
|
else:
|
||||||
geo_origin = in_memory_sheets[LOCATIONS].get(location_index, {})
|
geo_origin = in_memory_sheets[LOCATIONS].get(location_index, {})
|
||||||
country = geo_origin.get('Country', None)
|
country = geo_origin.get('Country', None)
|
||||||
|
|
||||||
_date = row.get("Date of collection", None)
|
_date = row.get("collectionDate", None)
|
||||||
if _date is None:
|
if _date is None:
|
||||||
_date = row.get("Date of isolation", None)
|
_date = row.get("isolationDate", None)
|
||||||
if _date is None:
|
if _date is None:
|
||||||
_date = row.get("Date of deposit", None)
|
_date = row.get("depositDate", None)
|
||||||
if _date is None:
|
if _date is None:
|
||||||
_date = row.get("Date of inclusion in the catalogue", None)
|
_date = row.get("accessionDate", None)
|
||||||
if _date is not None:
|
if _date is not None:
|
||||||
year = _date.year if isinstance(_date, datetime) else int(str(_date)[:4])
|
year = _date.year if isinstance(_date, datetime) else int(str(_date)[:4])
|
||||||
else:
|
else:
|
||||||
@ -258,9 +306,9 @@ def is_valid_nagoya(row, in_memory_sheets): # sourcery skip: return-identity
|
|||||||
if year is not None and year >= 2014 and country is None:
|
if year is not None and year >= 2014 and country is None:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def is_valid_regex(value, validation_conf):
|
def is_valid_regex(value, validation_conf):
|
||||||
if value is None:
|
if value is None:
|
||||||
return True
|
return True
|
||||||
@ -310,7 +358,9 @@ def is_valid_choices(value, validation_conf):
|
|||||||
values = [v.strip() for v in str(value).split(separator)]
|
values = [v.strip() for v in str(value).split(separator)]
|
||||||
else:
|
else:
|
||||||
values = [str(value).strip()]
|
values = [str(value).strip()]
|
||||||
|
sorted_values = sorted(values)
|
||||||
|
if sorted_values != values:
|
||||||
|
return False
|
||||||
return all(value in choices for value in values)
|
return all(value in choices for value in values)
|
||||||
|
|
||||||
|
|
||||||
@ -352,47 +402,145 @@ def is_valid_date(value, validation_conf):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def is_valid_coords(value, validation_conf=None):
|
def is_valid_dominio(value, validation_conf=None):
|
||||||
# sourcery skip: return-identity
|
|
||||||
if value is None:
|
if value is None:
|
||||||
return True
|
return True
|
||||||
try:
|
try:
|
||||||
items = [i.strip() for i in value.split(";")]
|
items = [i.strip() for i in value.split(";")]
|
||||||
latitude = float(items[0])
|
if len(items) >1:
|
||||||
longitude = float(items[1])
|
for i in range(0, len(items),2):
|
||||||
if len(items) > 2:
|
nameSite = str(items[i])
|
||||||
precision = float(items[2])
|
urlSite = str(items[i+1])
|
||||||
if latitude < -90 or latitude > 90:
|
dominio = urlSite.split(".")[-2]
|
||||||
return False
|
if nameSite.lower() != dominio:
|
||||||
if longitude < -180 or longitude > 180:
|
return False
|
||||||
return False
|
|
||||||
return True
|
return True
|
||||||
except:
|
except:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def is_valid_title(value, validation_conf=None):
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
items = [i.strip() for i in value.split(";")]
|
||||||
|
if len(items) >1:
|
||||||
|
for i in range(0, len(items),2):
|
||||||
|
nameSite = (items[i])
|
||||||
|
urlSite = str(items[i+1])
|
||||||
|
regex = r'^(http|https):\/\/[a-z0-9\-\.]+\.[a-z]{2,}([/a-z0-9\-\.]*)*$'
|
||||||
|
if re.match(regex, nameSite) or isinstance(nameSite, int) or nameSite == '':
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_valid_url_title(value, validation_conf=None):
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
items = [i.strip() for i in value.split(";")]
|
||||||
|
if len(items) ==1:
|
||||||
|
urlSite = str(items[0])
|
||||||
|
response = requests.head(urlSite)
|
||||||
|
if response.status_code != 200:
|
||||||
|
return False
|
||||||
|
|
||||||
|
else:
|
||||||
|
items = [i.strip() for i in value.split(";")]
|
||||||
|
for i in range(0, len(items),2):
|
||||||
|
nameSite = (items[i])
|
||||||
|
urlSite = str(items[i+1])
|
||||||
|
response = requests.head(urlSite)
|
||||||
|
if response.status_code != 200:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_url_dominio(value, validation_conf=None):
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
items = [i.strip() for i in value.split(";")]
|
||||||
|
for i in range(0, len(items),2):
|
||||||
|
nameSite = str(items[i])
|
||||||
|
urlSite = str(items[i+1])
|
||||||
|
response = requests.head(urlSite)
|
||||||
|
if response.status_code != 200:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_just_url(value, validation_conf=None):
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
items = [i.strip() for i in value.split(";")]
|
||||||
|
for i in items:
|
||||||
|
nameSite = str(items[0])
|
||||||
|
response = requests.head(i)
|
||||||
|
if response.status_code != 200:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_history(value, validation_conf=None):
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
items = [i.strip() for i in value.split("<")]
|
||||||
|
for i in items:
|
||||||
|
regex1 = r'^[a-zA-Z0-9 &,;.:''-]+,?\s*((19|20)\d{2})'
|
||||||
|
regex2 = r'^[a-zA-Z0-9 &,;.:''-]+,?\s*[a-zA-Z0-9 &,;.''-] (19|20)\d{2}\s\([a-zA-Z0-9 &,;.''-:]+\)'
|
||||||
|
regex3 = r'^[a-zA-Z0-9 &,;.:''-]+\,?\s*[a-zA-Z0-9 &,;.''-]'
|
||||||
|
regex4 = r'^[a-zA-Z0-9 &,;.''-]+,?\s*(19|20)\d{2}\s\([a-zA-Z0-9 .''-,;&:]+\)'
|
||||||
|
regex5 = r'^[a-zA-Z0-9 &,;.:''-]+,?\s*\([a-zA-Z0-9 &,;.''-:]+\) (19|20)\d{2}'
|
||||||
|
if re.match(regex1, i):
|
||||||
|
return True
|
||||||
|
elif re.match(regex2, i):
|
||||||
|
return True
|
||||||
|
elif re.match(regex3, i):
|
||||||
|
return True
|
||||||
|
elif re.match(regex4, i):
|
||||||
|
return True
|
||||||
|
elif re.match(regex5, i):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_coords(value, validation_conf=None):
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
|
||||||
|
regex1 = r'^-?(90(\.0+)?|[1-8]?\d(\.\d+)?)(\s*;\s*-?(180(\.0+)?|((1[0-7]\d)|(\d{1,2}))(\.\d+)?))*$'
|
||||||
|
regex2 = r'^-?(90(\.0+)?|[1-8]?\d(\.\d+)?)\s*;\s*-?(180(\.0+)?|((1[0-7]\d)|(\d{1,2}))(\.\d+)?)\s*;\s*(\d+\.\d+|\?)\s*;\s*(\d+\.\d+|\?)$|^(\d+\.\d+|\?)$|^\s*;\s*$'
|
||||||
|
|
||||||
|
if not re.match(regex1, value) and not re.match(regex2, value):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_valid_missing(value, validation_conf=None):
|
def is_valid_missing(value, validation_conf=None):
|
||||||
return value is not None
|
return value is not None
|
||||||
|
|
||||||
|
|
||||||
def is_valid_number(value, validation_conf):
|
|
||||||
if value is None:
|
|
||||||
return True
|
|
||||||
try:
|
|
||||||
value = float(value)
|
|
||||||
except TypeError:
|
|
||||||
return False
|
|
||||||
except ValueError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
_max = validation_conf.get('max', None)
|
|
||||||
_min = validation_conf.get('min', None)
|
|
||||||
if (_max is not None and value > _max) or (_min is not None and value < _min):
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def is_valid_taxon(value, validation_conf=None):
|
def is_valid_taxon(value, validation_conf=None):
|
||||||
multiple = validation_conf.get(MULTIPLE, False)
|
multiple = validation_conf.get(MULTIPLE, False)
|
||||||
separator = validation_conf.get(SEPARATOR, ';')
|
separator = validation_conf.get(SEPARATOR, ';')
|
||||||
@ -429,6 +577,8 @@ def _is_valid_taxon(value):
|
|||||||
|
|
||||||
|
|
||||||
def is_valid_unique(value, validation_conf):
|
def is_valid_unique(value, validation_conf):
|
||||||
|
if not value:
|
||||||
|
return True
|
||||||
label = validation_conf['label']
|
label = validation_conf['label']
|
||||||
shown_values = validation_conf['shown_values']
|
shown_values = validation_conf['shown_values']
|
||||||
if label not in shown_values:
|
if label not in shown_values:
|
||||||
@ -444,7 +594,6 @@ def is_valid_unique(value, validation_conf):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def is_valid_file(path):
|
def is_valid_file(path):
|
||||||
try:
|
try:
|
||||||
with path.open("rb") as fhand:
|
with path.open("rb") as fhand:
|
||||||
@ -464,8 +613,15 @@ VALIDATION_FUNCTIONS = {
|
|||||||
CROSSREF: is_valid_crossrefs,
|
CROSSREF: is_valid_crossrefs,
|
||||||
DATE: is_valid_date,
|
DATE: is_valid_date,
|
||||||
COORDINATES: is_valid_coords,
|
COORDINATES: is_valid_coords,
|
||||||
NUMBER: is_valid_number,
|
|
||||||
TAXON: is_valid_taxon,
|
TAXON: is_valid_taxon,
|
||||||
|
TITLE: is_valid_title,
|
||||||
|
DOMINIO: is_valid_dominio,
|
||||||
|
URL_TITLE: is_valid_url_title,
|
||||||
|
URL_DOMINIO: is_valid_url_dominio,
|
||||||
|
JUST_URL: is_valid_just_url,
|
||||||
|
ISO: validate_country_code,
|
||||||
|
HISTORY: is_valid_history,
|
||||||
|
VERSION: version,
|
||||||
UNIQUE: is_valid_unique}
|
UNIQUE: is_valid_unique}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -16,9 +16,20 @@ MATCH = 'match'
|
|||||||
VALUES = 'values'
|
VALUES = 'values'
|
||||||
DATE = 'date'
|
DATE = 'date'
|
||||||
COORDINATES = 'coord'
|
COORDINATES = 'coord'
|
||||||
|
COORDINATES1 = 'coord1'
|
||||||
NUMBER = 'number'
|
NUMBER = 'number'
|
||||||
TAXON = 'taxon'
|
TAXON = 'taxon'
|
||||||
UNIQUE = 'unique'
|
UNIQUE = 'unique'
|
||||||
ROW_VALIDATION = 'row_validation'
|
ROW_VALIDATION = 'row_validation'
|
||||||
NAGOYA = 'nagoya'
|
NAGOYA = 'nagoya'
|
||||||
BIBLIO = 'bibliography'
|
BIBLIO = 'bibliography'
|
||||||
|
DOMINIO= 'is_valid_dominio'
|
||||||
|
TITLE= 'is_valid_title'
|
||||||
|
URL_DOMINIO = 'urll_valid_dominio'
|
||||||
|
URL_TITLE= 'is_valid_url_title'
|
||||||
|
ISO = 'validate_country_code'
|
||||||
|
JUST_URL= 'is_valid_just_url'
|
||||||
|
HISTORY= 'is_valid_history'
|
||||||
|
MEU='is_valid_crossrefs_meu'
|
||||||
|
NAGOYA1 = 'nayoga1'
|
||||||
|
VERSION = 'version'
|
||||||
25
mirri/validation/validate_v5.py
Normal file
25
mirri/validation/validate_v5.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
import pandas as pd
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
import warnings
|
||||||
|
warnings.simplefilter("ignore")
|
||||||
|
from mirri.validation.excel_validator import validate_mirri_excel
|
||||||
|
|
||||||
|
def main():
|
||||||
|
path = Path(sys.argv[1])
|
||||||
|
version = str(sys.argv[2])
|
||||||
|
try:
|
||||||
|
|
||||||
|
error_log = validate_mirri_excel(path.open("rb"), version=version)
|
||||||
|
|
||||||
|
except NotImplementedError as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
for errors in error_log.get_errors().values():
|
||||||
|
for error in errors:
|
||||||
|
print(error.pk, error.message, error.code)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -1,10 +1,13 @@
|
|||||||
from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
|
from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
|
||||||
ERROR_CODE, FIELD, MANDATORY, MATCH,
|
ERROR_CODE, FIELD, MANDATORY, MATCH,
|
||||||
MISSING, MULTIPLE, NAGOYA, NUMBER, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON, TYPE,
|
MISSING, MULTIPLE, NAGOYA, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON, TYPE,
|
||||||
UNIQUE,
|
UNIQUE,VERSION,
|
||||||
VALIDATION, VALUES, BIBLIO)
|
VALIDATION, VALUES, BIBLIO, DOMINIO, URL_DOMINIO,ISO, JUST_URL, URL_TITLE, TITLE, HISTORY,NAGOYA1)
|
||||||
from mirri.settings import (ONTOBIOTOPE, LOCATIONS, GROWTH_MEDIA, GENOMIC_INFO,
|
from mirri.settings import (ONTOBIOTOPE, LOCATIONS, GROWTH_MEDIA, GENOMIC_INFO,
|
||||||
STRAINS, LITERATURE_SHEET, SEXUAL_STATE_SHEET, MARKERS)
|
STRAINS, LITERATURE_SHEET, SEXUAL_STATE_SHEET, MARKERS, CONTROL_SHEET)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# GEOGRAPHIC_ORIGIN
|
# GEOGRAPHIC_ORIGIN
|
||||||
# SEXUAL_STATE_SHEET,
|
# SEXUAL_STATE_SHEET,
|
||||||
# RESOURCE_TYPES_VALUES,
|
# RESOURCE_TYPES_VALUES,
|
||||||
@ -12,9 +15,12 @@ from mirri.settings import (ONTOBIOTOPE, LOCATIONS, GROWTH_MEDIA, GENOMIC_INFO,
|
|||||||
# PLOIDY_SHEET)
|
# PLOIDY_SHEET)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
STRAIN_FIELDS = [
|
STRAIN_FIELDS = [
|
||||||
|
|
||||||
{
|
{
|
||||||
FIELD: "Accession number",
|
FIELD: "accessionNumber",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: MANDATORY, ERROR_CODE: 'STD01'},
|
{TYPE: MANDATORY, ERROR_CODE: 'STD01'},
|
||||||
{TYPE: UNIQUE, ERROR_CODE: 'STD03'},
|
{TYPE: UNIQUE, ERROR_CODE: 'STD03'},
|
||||||
@ -23,16 +29,24 @@ STRAIN_FIELDS = [
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Restrictions on use",
|
FIELD: "useRestrictions",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: MANDATORY, ERROR_CODE: "STD05"},
|
{TYPE: MANDATORY, ERROR_CODE: "STD05"},
|
||||||
{TYPE: MISSING, ERROR_CODE: "STD06"},
|
{TYPE: MISSING, ERROR_CODE: "STD06"},
|
||||||
{TYPE: CHOICES, VALUES: ["1", "2", "3"],
|
{TYPE: CHOICES, VALUES: ["1", "2", "3"],
|
||||||
MULTIPLE: False, ERROR_CODE: "STD07"}
|
MULTIPLE: False, ERROR_CODE: "STD07"}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
FIELD: "mirriAccessionNumber",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: UNIQUE, ERROR_CODE: 'STD51'},
|
||||||
|
{TYPE: REGEXP, MATCH: "^MIRRI[0-9]{7}$", ERROR_CODE: "STD52"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
FIELD: "Nagoya protocol restrictions and compliance conditions",
|
FIELD: "nagoyaConditions",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: MANDATORY, ERROR_CODE: "STD08"},
|
{TYPE: MANDATORY, ERROR_CODE: "STD08"},
|
||||||
{TYPE: MISSING, ERROR_CODE: "STD09"},
|
{TYPE: MISSING, ERROR_CODE: "STD09"},
|
||||||
@ -41,29 +55,53 @@ STRAIN_FIELDS = [
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "ABS related files",
|
FIELD: "absFile",
|
||||||
VALIDATION: [],
|
VALIDATION: [
|
||||||
|
{TYPE: TITLE, ERROR_CODE: "STD59"},
|
||||||
|
{TYPE: URL_TITLE, ERROR_CODE: "STD60",
|
||||||
|
MULTIPLE: True, SEPARATOR: ";"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
FIELD: "siteLinks",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: DOMINIO, ERROR_CODE: "STD53",
|
||||||
|
MULTIPLE: False, SEPARATOR: ";"},
|
||||||
|
{TYPE: URL_DOMINIO, ERROR_CODE: "STD56",
|
||||||
|
MULTIPLE: False, SEPARATOR: ";"},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "MTA file",
|
FIELD: "mtaFile",
|
||||||
VALIDATION: [],
|
VALIDATION: [
|
||||||
|
{TYPE: JUST_URL, ERROR_CODE: "STD58",
|
||||||
|
MULTIPLE: True, SEPARATOR: ";"},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Other culture collection numbers",
|
FIELD: "otherCollectionNumbers",
|
||||||
# VALIDATION: [
|
VALIDATION: [
|
||||||
# {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD07",
|
{TYPE: REGEXP, MATCH: "([^ ]* [^ ]*)(; [^ ]* [^ ]*)*$", ERROR_CODE: "STD63",
|
||||||
# MULTIPLE: True, SEPARATOR: ";"}
|
MULTIPLE: True, SEPARATOR: ';'},
|
||||||
# ]
|
#{TYPE: CROSSREF, CROSSREF_NAME: "Strains", ERROR_CODE: "STD64"},
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Strain from a registered collection",
|
FIELD: "registeredCollection",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||||
ERROR_CODE: "STD11"}
|
ERROR_CODE: "STD11"}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "type",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: CHOICES, VALUES: ["1", "2"], ERROR_CODE: "STD64"},
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Risk Group",
|
FIELD: "riskGroup",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: MANDATORY, ERROR_CODE: "STD12"},
|
{TYPE: MANDATORY, ERROR_CODE: "STD12"},
|
||||||
{TYPE: MISSING, ERROR_CODE: "STD13"},
|
{TYPE: MISSING, ERROR_CODE: "STD13"},
|
||||||
@ -72,33 +110,41 @@ STRAIN_FIELDS = [
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Dual use",
|
FIELD: "dualUse",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||||
ERROR_CODE: "STD15"}
|
ERROR_CODE: "STD15"}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Quarantine in Europe",
|
FIELD: "euQuarantine",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||||
ERROR_CODE: "STD16"}
|
ERROR_CODE: "STD16"}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
FIELD: "axenicCulture",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: CHOICES, VALUES: ["Axenic", "Not axenic"],
|
||||||
|
ERROR_CODE: "STD50"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
FIELD: "Organism type",
|
FIELD: "organismType",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: MANDATORY, ERROR_CODE: "STD17"},
|
{TYPE: MANDATORY, ERROR_CODE: "STD17"},
|
||||||
{TYPE: MISSING, ERROR_CODE: "STD18"},
|
{TYPE: MISSING, ERROR_CODE: "STD18"},
|
||||||
{TYPE: CHOICES, VALUES: ["Algae", "Archaea", "Bacteria",
|
{TYPE: CHOICES, VALUES: ["Algae", "Archaea", "Bacteria",
|
||||||
"Cyanobacteria", "Filamentous Fungi",
|
"Cyanobacteria", "Filamentous Fungi", "Filamentous fungi",
|
||||||
"Phage", "Plasmid", "Virus", "Yeast",
|
"Yeast", "Microalgae",
|
||||||
"1", "2", "3", "4", "5", "6", "7", "8", "9"],
|
"1", "2", "3", "4", "5", "6", "7"],
|
||||||
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD19"}
|
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD19"}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Taxon name",
|
FIELD: "speciesName",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: MANDATORY, ERROR_CODE: "STD20"},
|
{TYPE: MANDATORY, ERROR_CODE: "STD20"},
|
||||||
{TYPE: MISSING, ERROR_CODE: "STD21"},
|
{TYPE: MISSING, ERROR_CODE: "STD21"},
|
||||||
@ -107,73 +153,69 @@ STRAIN_FIELDS = [
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Infrasubspecific names",
|
FIELD: "infrasubspecificNames",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Comment on taxonomy",
|
FIELD: "taxonomyComments",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Interspecific hybrid",
|
FIELD: "hybrid",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||||
ERROR_CODE: "STD23"}
|
ERROR_CODE: "STD23"}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Status",
|
FIELD: "status",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: REGEXP, MATCH: "^(type of|neotype of|holotype of |epitype of) ([a-zA-Z .'-]+)$", ERROR_CODE: "STD65"},
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "History of deposit",
|
FIELD: "depositHistory",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
# {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD24", # modify the regex
|
{TYPE: HISTORY, ERROR_CODE: 'STD24'},
|
||||||
# MULTIPLE: True, SEPARATOR: ";"}
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Depositor"
|
FIELD: "depositor",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Date of deposit",
|
FIELD: "depositDate",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: DATE, ERROR_CODE: "STD25"},
|
{TYPE: DATE, ERROR_CODE: "STD25"},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Date of inclusion in the catalogue",
|
FIELD: "accessionDate",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: DATE, ERROR_CODE: "STD26"},
|
{TYPE: DATE, ERROR_CODE: "STD26"},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Collected by",
|
FIELD: "collector",
|
||||||
|
VALIDATION: []
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
FIELD: "substrate",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Date of collection",
|
FIELD: "temperatureGrowthRange",
|
||||||
VALIDATION: [
|
|
||||||
{TYPE: DATE, ERROR_CODE: "STD27"},
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
FIELD: "Isolated by",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
FIELD: "Date of isolation",
|
|
||||||
VALIDATION: [
|
|
||||||
{TYPE: DATE, ERROR_CODE: "STD28"},
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
FIELD: "Substrate/host of isolation",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
FIELD: "Tested temperature growth range",
|
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?',
|
{TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?',
|
||||||
ERROR_CODE: "STD29", MULTIPLE: True, SEPARATOR: ";"}
|
ERROR_CODE: "STD29", MULTIPLE: True, SEPARATOR: ";"}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Recommended growth temperature",
|
FIELD: "recommendedTemperature",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: MANDATORY, ERROR_CODE: "STD30"},
|
{TYPE: MANDATORY, ERROR_CODE: "STD30"},
|
||||||
{TYPE: MISSING, ERROR_CODE: "STD31"},
|
{TYPE: MISSING, ERROR_CODE: "STD31"},
|
||||||
@ -182,17 +224,9 @@ STRAIN_FIELDS = [
|
|||||||
MULTIPLE: True, SEPARATOR: ";"}
|
MULTIPLE: True, SEPARATOR: ";"}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
FIELD: "Recommended medium for growth",
|
FIELD: "supplyForms",
|
||||||
VALIDATION: [
|
|
||||||
{TYPE: MANDATORY, ERROR_CODE: "STD33"},
|
|
||||||
{TYPE: MISSING, ERROR_CODE: "STD34"},
|
|
||||||
{TYPE: CROSSREF, CROSSREF_NAME: "Growth media",
|
|
||||||
MULTIPLE: True, SEPARATOR: "/", ERROR_CODE: "STD35"}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
FIELD: "Form of supply",
|
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: MANDATORY, ERROR_CODE: "STD36"},
|
{TYPE: MANDATORY, ERROR_CODE: "STD36"},
|
||||||
{TYPE: MISSING, ERROR_CODE: "STD37"},
|
{TYPE: MISSING, ERROR_CODE: "STD37"},
|
||||||
@ -202,52 +236,70 @@ STRAIN_FIELDS = [
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Other denomination",
|
FIELD: "otherDenomination",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Coordinates of geographic origin",
|
FIELD: "geographicCoordinates",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: COORDINATES, ERROR_CODE: "STD39"},
|
{TYPE: COORDINATES, ERROR_CODE: "STD39"},
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
FIELD: "Altitude of geographic origin",
|
|
||||||
VALIDATION: [
|
|
||||||
{TYPE: NUMBER, 'max': 8000, 'min': -200, ERROR_CODE: "STD40"},
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
# value can be in the cell or in another sheet. Don't configure this
|
# value can be in the cell or in another sheet. Don't configure this
|
||||||
FIELD: "Geographic origin",
|
FIELD: "geographicOrigin",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "STD68"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "STD69"},
|
||||||
|
{TYPE: CROSSREF, CROSSREF_NAME: "Geographic origin", ERROR_CODE: "STD46"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
FIELD: "isolationHabitat",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Isolation habitat",
|
FIELD: "ontobiotopeTerms",
|
||||||
},
|
|
||||||
{
|
|
||||||
FIELD: "Ontobiotope term for the isolation habitat",
|
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: CROSSREF, CROSSREF_NAME: "Ontobiotope",
|
{TYPE: CROSSREF, CROSSREF_NAME: "Ontobiotope",
|
||||||
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD41"}
|
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD41"}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "qps",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||||
|
ERROR_CODE: "STD49"}
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "GMO",
|
FIELD: "gmo",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: CHOICES, VALUES: ["1", "2"],
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
||||||
ERROR_CODE: "STD42"}
|
ERROR_CODE: "STD42"}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "GMO construction information",
|
FIELD: "gmoConstruction",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Mutant information",
|
FIELD: "mutant",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Genotype",
|
FIELD: "genotype",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Sexual state",
|
FIELD: "Plant pathogenicity code",
|
||||||
|
VALIDATION: []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "sexualState",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: CROSSREF, CROSSREF_NAME: SEXUAL_STATE_SHEET,
|
{TYPE: CROSSREF, CROSSREF_NAME: SEXUAL_STATE_SHEET,
|
||||||
ERROR_CODE: "STD43"}
|
ERROR_CODE: "STD43"}
|
||||||
@ -258,46 +310,78 @@ STRAIN_FIELDS = [
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Ploidy",
|
FIELD: "ploidy",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: CHOICES, VALUES: ["0", "1", "2", "3", "4", "9"],
|
{TYPE: CHOICES, VALUES: ["1", "2", "3", "4", "5", "9"],
|
||||||
ERROR_CODE: "STD44"}
|
ERROR_CODE: "STD44"}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Plasmids",
|
FIELD: "plasmids",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Plasmids collections fields",
|
FIELD: "plasmidCollections",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: REGEXP, MATCH: "([a-zA-Z .'-]+)\(([a-zA-Z .'-]+) (\d+)\)(\s*;([a-zA-Z .'-]+)\(([a-zA-Z .'-]+) (\d+)\))*$",
|
||||||
|
ERROR_CODE: "STD62"}
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# value can be in the cell or in another sheet. Don't configure this
|
# value can be in the cell or in another sheet. Don't configure this
|
||||||
FIELD: "Literature",
|
FIELD: "identificationLiterature",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: CROSSREF, CROSSREF_NAME: LITERATURE_SHEET,
|
{TYPE: CROSSREF, CROSSREF_NAME: LITERATURE_SHEET,
|
||||||
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD45"}
|
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD45"}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Plant pathogenicity code",
|
FIELD: "pathogenicity",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Pathogenicity",
|
FIELD: "enzymes",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Enzyme production",
|
FIELD: "metabolites",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Production of metabolites",
|
FIELD: "applications",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Applications",
|
FIELD: "remarks",
|
||||||
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Remarks"
|
FIELD: "sequenceLiterature",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: REGEXP, MATCH: "^\d+(\s*;?\s*\d+)*$", ERROR_CODE: "STD61"},
|
||||||
|
]
|
||||||
|
|
||||||
},
|
},
|
||||||
{
|
|
||||||
FIELD: "Literature linked to the sequence/genome",
|
{
|
||||||
|
FIELD: "recommendedMedium",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "STD33"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "STD34"},
|
||||||
|
{TYPE: CROSSREF, CROSSREF_NAME: "Growth media",
|
||||||
|
MULTIPLE: True, SEPARATOR: "/", ERROR_CODE: "STD35"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
FIELD: "country",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "STD54"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "STD55"},
|
||||||
|
{TYPE: ISO, ERROR_CODE: "STD57"},
|
||||||
|
#{TYPE: CROSSREF, CROSSREF_NAME: COUNTRY_CODES_SHEET, ERROR_CODE: "STD57"}
|
||||||
|
]
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
SHEETS_SCHEMA = {
|
SHEETS_SCHEMA = {
|
||||||
@ -317,7 +401,7 @@ SHEETS_SCHEMA = {
|
|||||||
FIELD: "Country",
|
FIELD: "Country",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: MANDATORY, ERROR_CODE: "GOD03"},
|
{TYPE: MANDATORY, ERROR_CODE: "GOD03"},
|
||||||
{TYPE: MISSING, ERROR_CODE: "GOD04"}
|
{TYPE: MISSING, ERROR_CODE: "GOD04"},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -389,6 +473,7 @@ SHEETS_SCHEMA = {
|
|||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: MANDATORY, ERROR_CODE: "GID07"},
|
{TYPE: MANDATORY, ERROR_CODE: "GID07"},
|
||||||
{TYPE: MISSING, ERROR_CODE: "GID08"},
|
{TYPE: MISSING, ERROR_CODE: "GID08"},
|
||||||
|
{TYPE: REGEXP, MATCH: "^[A-Z]{2}[0-9]{6}$", ERROR_CODE: "GID11"},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -399,11 +484,9 @@ SHEETS_SCHEMA = {
|
|||||||
},
|
},
|
||||||
STRAINS: {
|
STRAINS: {
|
||||||
"acronym": "STD",
|
"acronym": "STD",
|
||||||
'id_field': 'Accession number',
|
'id_field': 'accessionNumber',
|
||||||
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS05"},
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS05"},
|
||||||
ROW_VALIDATION: [
|
ROW_VALIDATION: [],
|
||||||
{TYPE: NAGOYA, ERROR_CODE: "STD46"},
|
|
||||||
],
|
|
||||||
COLUMNS: STRAIN_FIELDS,
|
COLUMNS: STRAIN_FIELDS,
|
||||||
},
|
},
|
||||||
LITERATURE_SHEET: {
|
LITERATURE_SHEET: {
|
||||||
@ -412,7 +495,7 @@ SHEETS_SCHEMA = {
|
|||||||
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS03"},
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS03"},
|
||||||
ROW_VALIDATION: [
|
ROW_VALIDATION: [
|
||||||
{TYPE: BIBLIO, ERROR_CODE: 'LID17'}
|
{TYPE: BIBLIO, ERROR_CODE: 'LID17'}
|
||||||
],
|
],
|
||||||
COLUMNS: [
|
COLUMNS: [
|
||||||
{
|
{
|
||||||
FIELD: "ID",
|
FIELD: "ID",
|
||||||
@ -421,6 +504,18 @@ SHEETS_SCHEMA = {
|
|||||||
{TYPE: MISSING, ERROR_CODE: "LID02"},
|
{TYPE: MISSING, ERROR_CODE: "LID02"},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
FIELD: "PMID",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "LID18"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "DOI",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "LID20"},
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Full reference",
|
FIELD: "Full reference",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
@ -465,7 +560,6 @@ SHEETS_SCHEMA = {
|
|||||||
FIELD: "First page",
|
FIELD: "First page",
|
||||||
VALIDATION: [
|
VALIDATION: [
|
||||||
{TYPE: MANDATORY, ERROR_CODE: "LID15"},
|
{TYPE: MANDATORY, ERROR_CODE: "LID15"},
|
||||||
{TYPE: MISSING, ERROR_CODE: "LID16"},
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -504,13 +598,38 @@ SHEETS_SCHEMA = {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
FIELD: "Name",
|
FIELD: "Name",
|
||||||
VALIDATION: [
|
VALIDATION: []
|
||||||
{TYPE: MANDATORY, ERROR_CODE: "OTD03"},
|
|
||||||
{TYPE: MISSING, ERROR_CODE: "OTD04"},
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CONTROL_SHEET: {
|
||||||
|
"acronym": "VRS",
|
||||||
|
"id_field": "Version",
|
||||||
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS09"},
|
||||||
|
COLUMNS: [
|
||||||
|
{
|
||||||
|
FIELD: "Version",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "VRS01"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "VRS02"},
|
||||||
|
{TYPE: VERSION, ERROR_CODE: "VRS05"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FIELD: "Date",
|
||||||
|
VALIDATION: [
|
||||||
|
{TYPE: MANDATORY, ERROR_CODE: "VRS03"},
|
||||||
|
{TYPE: MISSING, ERROR_CODE: "VRS04"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
|
||||||
MARKERS: {
|
MARKERS: {
|
||||||
"acronym": "MKD",
|
"acronym": "MKD",
|
||||||
"id_field": "Acronym",
|
"id_field": "Acronym",
|
||||||
@ -524,22 +643,31 @@ SHEETS_SCHEMA = {
|
|||||||
VALIDATION: []
|
VALIDATION: []
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
CROSS_REF_CONF = {
|
CROSS_REF_CONF = {
|
||||||
ONTOBIOTOPE: ['ID', 'Name'],
|
ONTOBIOTOPE: ['ID'],
|
||||||
LITERATURE_SHEET: ['ID'],
|
LITERATURE_SHEET: ['ID', 'DOI', 'PMID', 'Full reference', 'Authors', 'Title', 'Journal', 'Year', 'Volume', 'First page'],
|
||||||
LOCATIONS: ['Locality'],
|
LOCATIONS: ['ID', 'Locality'],
|
||||||
GROWTH_MEDIA: ['Acronym'],
|
GROWTH_MEDIA: ['Acronym'],
|
||||||
STRAINS: ["Accession number"],
|
STRAINS: ["accessionNumber"],
|
||||||
SEXUAL_STATE_SHEET: [],
|
SEXUAL_STATE_SHEET: [],
|
||||||
MARKERS: ["Acronym"],
|
MARKERS: ["Acronym"],
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MIRRI_20200601_VALLIDATION_CONF = {
|
MIRRI_12052023_VALLIDATION_CONF = {
|
||||||
'sheet_schema': SHEETS_SCHEMA,
|
'sheet_schema': SHEETS_SCHEMA,
|
||||||
'cross_ref_conf': CROSS_REF_CONF,
|
'cross_ref_conf': CROSS_REF_CONF,
|
||||||
'keep_sheets_in_memory': [
|
'keep_sheets_in_memory': [
|
||||||
{'sheet_name': LOCATIONS, 'indexed_by': 'Locality'}]
|
{'sheet_name': LOCATIONS, 'indexed_by': 'Locality'}]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
version_config = {
|
||||||
|
'5.1.2': MIRRI_12052023_VALLIDATION_CONF,
|
||||||
|
'date': '12/05/2023'
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user