from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE, ERROR_CODE, FIELD, MANDATORY, MATCH, MISSING, MULTIPLE, NAGOYA, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON, TYPE, UNIQUE,VERSION, VALIDATION, VALUES, BIBLIO, DOMINIO, URL_DOMINIO,ISO, JUST_URL, URL_TITLE, TITLE, HISTORY,NAGOYA1) from mirri.settings import (ONTOBIOTOPE, LOCATIONS, GROWTH_MEDIA, GENOMIC_INFO, STRAINS, LITERATURE_SHEET, SEXUAL_STATE_SHEET, MARKERS, CONTROL_SHEET) # GEOGRAPHIC_ORIGIN # SEXUAL_STATE_SHEET, # RESOURCE_TYPES_VALUES, # FORM_OF_SUPPLY_SHEET, # PLOIDY_SHEET) STRAIN_FIELDS = [ { FIELD: "accessionNumber", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: 'STD01'}, {TYPE: UNIQUE, ERROR_CODE: 'STD03'}, {TYPE: MISSING, ERROR_CODE: "STD02"}, {TYPE: REGEXP, MATCH: "[^ ]* [^ ]*", ERROR_CODE: "STD04"} ] }, { FIELD: "useRestrictions", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "STD05"}, {TYPE: MISSING, ERROR_CODE: "STD06"}, {TYPE: CHOICES, VALUES: ["1", "2", "3"], MULTIPLE: False, ERROR_CODE: "STD07"} ] }, { FIELD: "mirriAccessionNumber", VALIDATION: [ {TYPE: UNIQUE, ERROR_CODE: 'STD51'}, {TYPE: REGEXP, MATCH: "^MIRRI[0-9]{7}$", ERROR_CODE: "STD52"}, ], }, { FIELD: "nagoyaConditions", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "STD08"}, {TYPE: MISSING, ERROR_CODE: "STD09"}, {TYPE: CHOICES, VALUES: ["1", "2", "3"], MULTIPLE: False, ERROR_CODE: "STD10"} ] }, { FIELD: "absFile", VALIDATION: [ {TYPE: TITLE, ERROR_CODE: "STD59"}, {TYPE: URL_TITLE, ERROR_CODE: "STD60", MULTIPLE: True, SEPARATOR: ";"}, ], }, { FIELD: "siteLinks", VALIDATION: [ {TYPE: DOMINIO, ERROR_CODE: "STD53", MULTIPLE: False, SEPARATOR: ";"}, {TYPE: URL_DOMINIO, ERROR_CODE: "STD56", MULTIPLE: False, SEPARATOR: ";"}, ], }, { FIELD: "mtaFile", VALIDATION: [ {TYPE: JUST_URL, ERROR_CODE: "STD58", MULTIPLE: True, SEPARATOR: ";"}, ], }, { FIELD: "otherCollectionNumbers", VALIDATION: [ {TYPE: REGEXP, MATCH: "([^ ]* [^ ]*)(; [^ ]* [^ ]*)*$", ERROR_CODE: "STD63", MULTIPLE: True, SEPARATOR: ';'}, #{TYPE: CROSSREF, CROSSREF_NAME: "Strains", ERROR_CODE: "STD64"}, ] }, { FIELD: "registeredCollection", VALIDATION: [ {TYPE: CHOICES, VALUES: ["1", "2"], ERROR_CODE: "STD11"} ] }, { FIELD: "type", VALIDATION: [ {TYPE: CHOICES, VALUES: ["1", "2"], ERROR_CODE: "STD64"}, ] }, { FIELD: "riskGroup", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "STD12"}, {TYPE: MISSING, ERROR_CODE: "STD13"}, {TYPE: CHOICES, VALUES: ["1", "2", "3", "4"], MULTIPLE: False, ERROR_CODE: "STD14"} ] }, { FIELD: "dualUse", VALIDATION: [ {TYPE: CHOICES, VALUES: ["1", "2"], ERROR_CODE: "STD15"} ] }, { FIELD: "euQuarantine", VALIDATION: [ {TYPE: CHOICES, VALUES: ["1", "2"], ERROR_CODE: "STD16"} ] }, { FIELD: "axenicCulture", VALIDATION: [ {TYPE: CHOICES, VALUES: ["Axenic", "Not axenic"], ERROR_CODE: "STD50"} ] }, { FIELD: "organismType", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "STD17"}, {TYPE: MISSING, ERROR_CODE: "STD18"}, {TYPE: CHOICES, VALUES: ["Algae", "Archaea", "Bacteria", "Cyanobacteria", "Filamentous Fungi", "Filamentous fungi", "Yeast", "Microalgae", "1", "2", "3", "4", "5", "6", "7"], MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD19"} ] }, { FIELD: "speciesName", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "STD20"}, {TYPE: MISSING, ERROR_CODE: "STD21"}, {TYPE: TAXON, ERROR_CODE: "STD22", MULTIPLE: True, SEPARATOR: ';'} ] }, { FIELD: "infrasubspecificNames", VALIDATION: [] }, { FIELD: "taxonomyComments", VALIDATION: [] }, { FIELD: "hybrid", VALIDATION: [ {TYPE: CHOICES, VALUES: ["1", "2"], ERROR_CODE: "STD23"} ] }, { FIELD: "status", VALIDATION: [ {TYPE: REGEXP, MATCH: "^(type of|neotype of|holotype of |epitype of) ([a-zA-Z .'-]+)$", ERROR_CODE: "STD65"}, ] }, { FIELD: "depositHistory", VALIDATION: [ {TYPE: HISTORY, ERROR_CODE: 'STD24'}, ] }, { FIELD: "depositor", VALIDATION: [] }, { FIELD: "depositDate", VALIDATION: [ {TYPE: DATE, ERROR_CODE: "STD25"}, ] }, { FIELD: "accessionDate", VALIDATION: [ {TYPE: DATE, ERROR_CODE: "STD26"}, ] }, { FIELD: "collector", VALIDATION: [] }, { FIELD: "substrate", VALIDATION: [] }, { FIELD: "temperatureGrowthRange", VALIDATION: [ {TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?', ERROR_CODE: "STD29", MULTIPLE: True, SEPARATOR: ";"} ] }, { FIELD: "recommendedTemperature", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "STD30"}, {TYPE: MISSING, ERROR_CODE: "STD31"}, {TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?', ERROR_CODE: "STD32", MULTIPLE: True, SEPARATOR: ";"} ] }, { FIELD: "supplyForms", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "STD36"}, {TYPE: MISSING, ERROR_CODE: "STD37"}, {TYPE: CHOICES, VALUES: ['Agar', 'Cryo', 'Dry Ice', 'Liquid Culture Medium', 'Lyo', 'Oil', 'Water'], MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD38"} ] }, { FIELD: "otherDenomination", VALIDATION: [] }, { FIELD: "geographicCoordinates", VALIDATION: [ {TYPE: COORDINATES, ERROR_CODE: "STD39"}, ] }, { # value can be in the cell or in another sheet. Don't configure this FIELD: "geographicOrigin", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "STD68"}, {TYPE: MISSING, ERROR_CODE: "STD69"}, {TYPE: CROSSREF, CROSSREF_NAME: "Geographic origin", ERROR_CODE: "STD46"}, ] }, { FIELD: "isolationHabitat", VALIDATION: [] }, { FIELD: "ontobiotopeTerms", VALIDATION: [ {TYPE: CROSSREF, CROSSREF_NAME: "Ontobiotope", MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD41"} ] }, { FIELD: "qps", VALIDATION: [ {TYPE: CHOICES, VALUES: ["1", "2"], ERROR_CODE: "STD49"} ] }, { FIELD: "gmo", VALIDATION: [ {TYPE: CHOICES, VALUES: ["1", "2"], ERROR_CODE: "STD42"} ] }, { FIELD: "gmoConstruction", VALIDATION: [] }, { FIELD: "mutant", VALIDATION: [] }, { FIELD: "genotype", VALIDATION: [] }, { FIELD: "Plant pathogenicity code", VALIDATION: [] }, { FIELD: "sexualState", VALIDATION: [ {TYPE: CROSSREF, CROSSREF_NAME: SEXUAL_STATE_SHEET, ERROR_CODE: "STD43"} # {TYPE: CHOICES, VALUES: ["Mata", "Matalpha", "Mata/Matalpha", # "Matb", "Mata/Matb", "MTLa", "MTLalpha", "MTLa/MTLalpha", # "MAT1-1", "MAT1-2", "MAT1", "MAT2", "MT+", "MT-"], # ERROR_CODE: "STD43"} ] }, { FIELD: "ploidy", VALIDATION: [ {TYPE: CHOICES, VALUES: ["1", "2", "3", "4", "5", "9"], ERROR_CODE: "STD44"} ] }, { FIELD: "plasmids", VALIDATION: [] }, { FIELD: "plasmidCollections", VALIDATION: [ {TYPE: REGEXP, MATCH: "([a-zA-Z .'-]+)\(([a-zA-Z .'-]+) (\d+)\)(\s*;([a-zA-Z .'-]+)\(([a-zA-Z .'-]+) (\d+)\))*$", ERROR_CODE: "STD62"} ] }, { # value can be in the cell or in another sheet. Don't configure this FIELD: "identificationLiterature", VALIDATION: [ {TYPE: CROSSREF, CROSSREF_NAME: LITERATURE_SHEET, MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD45"} ] }, { FIELD: "pathogenicity", VALIDATION: [] }, { FIELD: "enzymes", VALIDATION: [] }, { FIELD: "metabolites", VALIDATION: [] }, { FIELD: "applications", VALIDATION: [] }, { FIELD: "remarks", VALIDATION: [] }, { FIELD: "sequenceLiterature", VALIDATION: [ {TYPE: REGEXP, MATCH: "^\d+(\s*;?\s*\d+)*$", ERROR_CODE: "STD61"}, ] }, { FIELD: "recommendedMedium", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "STD33"}, {TYPE: MISSING, ERROR_CODE: "STD34"}, {TYPE: CROSSREF, CROSSREF_NAME: "Growth media", MULTIPLE: True, SEPARATOR: "/", ERROR_CODE: "STD35"} ] }, { FIELD: "country", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "STD54"}, {TYPE: MISSING, ERROR_CODE: "STD55"}, {TYPE: ISO, ERROR_CODE: "STD57"}, #{TYPE: CROSSREF, CROSSREF_NAME: COUNTRY_CODES_SHEET, ERROR_CODE: "STD57"} ] }, ] SHEETS_SCHEMA = { LOCATIONS: { "acronym": "GOD", "id_field": "ID", VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS02"}, COLUMNS: [ { FIELD: "ID", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "GOD01"}, {TYPE: MISSING, ERROR_CODE: "GOD02"}, ] }, { FIELD: "Country", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "GOD03"}, {TYPE: MISSING, ERROR_CODE: "GOD04"}, ] }, { FIELD: "Region", VALIDATION: [] }, { FIELD: "City", VALIDATION: [] }, { FIELD: "Locality", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "GOD06"}, {TYPE: MISSING, ERROR_CODE: "GOD07"} ] } ], }, GROWTH_MEDIA: { "acronym": "GMD", "id_field": "Acronym", VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS01"}, COLUMNS: [ { FIELD: "Acronym", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "GMD01"}, {TYPE: MISSING, ERROR_CODE: "GMD02"} ] }, { FIELD: "Description", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "GMD03"}, {TYPE: MISSING, ERROR_CODE: "GMD04"} ] }, { FIELD: "Full description", VALIDATION: [] }, ], }, GENOMIC_INFO: { "acronym": "GID", "id_field": "Strain AN", VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS08"}, COLUMNS: [ { FIELD: "Strain AN", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "GID01"}, {TYPE: MISSING, ERROR_CODE: "GID02"}, {TYPE: CROSSREF, CROSSREF_NAME: "Strains", ERROR_CODE: "GID03"}, ] }, { FIELD: "Marker", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "GID04"}, {TYPE: MISSING, ERROR_CODE: "GID05"}, {TYPE: CROSSREF, CROSSREF_NAME: MARKERS, ERROR_CODE: "GID06"} ] }, { FIELD: "INSDC AN", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "GID07"}, {TYPE: MISSING, ERROR_CODE: "GID08"}, {TYPE: REGEXP, MATCH: "^[A-Z]{2}[0-9]{6}$", ERROR_CODE: "GID11"}, ] }, { FIELD: "Sequence", VALIDATION: [] }, ], }, STRAINS: { "acronym": "STD", 'id_field': 'accessionNumber', VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS05"}, ROW_VALIDATION: [], COLUMNS: STRAIN_FIELDS, }, LITERATURE_SHEET: { "acronym": "LID", 'id_field': 'ID', VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS03"}, ROW_VALIDATION: [ {TYPE: BIBLIO, ERROR_CODE: 'LID17'} ], COLUMNS: [ { FIELD: "ID", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "LID01"}, {TYPE: MISSING, ERROR_CODE: "LID02"}, ] }, { FIELD: "PMID", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "LID18"}, ] }, { FIELD: "DOI", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "LID20"}, ] }, { FIELD: "Full reference", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "LID03"}, ] }, { FIELD: "Authors", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "LID05"}, ] }, { FIELD: "Title", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "LID07"}, ] }, { FIELD: "Journal", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "LID09"}, ] }, { FIELD: "Year", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "LID11"}, ] }, { FIELD: "Volume", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "LID13"}, ] }, { FIELD: "Issue", VALIDATION: [] }, { FIELD: "First page", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "LID15"}, ] }, { FIELD: "Last page", VALIDATION: [] }, { FIELD: "Book title", VALIDATION: [] }, { FIELD: "Editors", VALIDATION: [] }, { FIELD: "Publisher", VALIDATION: [] } ], }, # SEXUAL_STATE_SHEET: {"acronym": "SSD", COLUMNS: []}, # RESOURCE_TYPES_VALUES: {"acronym": "RTD", COLUMNS: []}, # FORM_OF_SUPPLY_SHEET: {"acronym": "FSD", COLUMNS: []}, # PLOIDY_SHEET: {"acronym": "PLD", COLUMNS: []}, ONTOBIOTOPE: { "acronym": "OTD", "id_field": "ID", VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS06"}, COLUMNS: [ { FIELD: "ID", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "OTD01"}, {TYPE: MISSING, ERROR_CODE: "OTD02"}, ] }, { FIELD: "Name", VALIDATION: [] }, ] }, CONTROL_SHEET: { "acronym": "CTR", "id_field": "Version", VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS09"}, COLUMNS: [ { FIELD: "Version", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "CTR01"}, {TYPE: MISSING, ERROR_CODE: "CTR02"}, {TYPE: VERSION, ERROR_CODE: "CTR05"}, ] }, { FIELD: "Date", VALIDATION: [ {TYPE: MANDATORY, ERROR_CODE: "CTR03"}, {TYPE: MISSING, ERROR_CODE: "CTR04"}, ] }, ] }, MARKERS: { "acronym": "MKD", "id_field": "Acronym", COLUMNS: [ { FIELD: "Acronym", VALIDATION: [] }, { FIELD: "Marker", VALIDATION: [] }, ], }, } CROSS_REF_CONF = { ONTOBIOTOPE: ['ID'], LITERATURE_SHEET: ['ID', 'DOI', 'PMID', 'Full reference', 'Authors', 'Title', 'Journal', 'Year', 'Volume', 'First page'], LOCATIONS: ['ID', 'Locality'], GROWTH_MEDIA: ['Acronym'], STRAINS: ["accessionNumber"], SEXUAL_STATE_SHEET: [], MARKERS: ["Acronym"], } MIRRI_12052023_VALLIDATION_CONF = { 'sheet_schema': SHEETS_SCHEMA, 'cross_ref_conf': CROSS_REF_CONF, 'keep_sheets_in_memory': [ {'sheet_name': LOCATIONS, 'indexed_by': 'Locality'}] } version_config = { '5.1.2': MIRRI_12052023_VALLIDATION_CONF, 'date': '12/05/2023' }