546 lines
15 KiB
Python
546 lines
15 KiB
Python
from mirri.validation.tags import (CHOICES, COLUMNS, COORDINATES, CROSSREF, CROSSREF_NAME, DATE,
|
|
ERROR_CODE, FIELD, MANDATORY, MATCH,
|
|
MISSING, MULTIPLE, NAGOYA, NUMBER, REGEXP, ROW_VALIDATION, SEPARATOR, TAXON, TYPE,
|
|
UNIQUE,
|
|
VALIDATION, VALUES, BIBLIO)
|
|
from mirri.settings_v1 import (ONTOBIOTOPE, LOCATIONS, GROWTH_MEDIA, GENOMIC_INFO,
|
|
STRAINS, LITERATURE_SHEET, SEXUAL_STATE_SHEET, MARKERS)
|
|
# GEOGRAPHIC_ORIGIN
|
|
# SEXUAL_STATE_SHEET,
|
|
# RESOURCE_TYPES_VALUES,
|
|
# FORM_OF_SUPPLY_SHEET,
|
|
# PLOIDY_SHEET)
|
|
|
|
|
|
STRAIN_FIELDS = [
|
|
{
|
|
FIELD: "Accession number",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: 'STD01'},
|
|
{TYPE: UNIQUE, ERROR_CODE: 'STD03'},
|
|
{TYPE: MISSING, ERROR_CODE: "STD02"},
|
|
{TYPE: REGEXP, MATCH: "[^ ]* [^ ]*", ERROR_CODE: "STD04"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Restrictions on use",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "STD05"},
|
|
{TYPE: MISSING, ERROR_CODE: "STD06"},
|
|
{TYPE: CHOICES, VALUES: ["1", "2", "3"],
|
|
MULTIPLE: False, ERROR_CODE: "STD07"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Nagoya protocol restrictions and compliance conditions",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "STD08"},
|
|
{TYPE: MISSING, ERROR_CODE: "STD09"},
|
|
{TYPE: CHOICES, VALUES: ["1", "2", "3"],
|
|
MULTIPLE: False, ERROR_CODE: "STD10"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "ABS related files",
|
|
VALIDATION: [],
|
|
},
|
|
{
|
|
FIELD: "MTA file",
|
|
VALIDATION: [],
|
|
},
|
|
{
|
|
FIELD: "Other culture collection numbers",
|
|
# VALIDATION: [
|
|
# {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD07",
|
|
# MULTIPLE: True, SEPARATOR: ";"}
|
|
# ]
|
|
},
|
|
{
|
|
FIELD: "Strain from a registered collection",
|
|
VALIDATION: [
|
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
|
ERROR_CODE: "STD11"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Risk Group",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "STD12"},
|
|
{TYPE: MISSING, ERROR_CODE: "STD13"},
|
|
{TYPE: CHOICES, VALUES: ["1", "2", "3", "4"],
|
|
MULTIPLE: False, ERROR_CODE: "STD14"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Dual use",
|
|
VALIDATION: [
|
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
|
ERROR_CODE: "STD15"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Quarantine in Europe",
|
|
VALIDATION: [
|
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
|
ERROR_CODE: "STD16"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Organism type",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "STD17"},
|
|
{TYPE: MISSING, ERROR_CODE: "STD18"},
|
|
{TYPE: CHOICES, VALUES: ["Algae", "Archaea", "Bacteria",
|
|
"Cyanobacteria", "Filamentous Fungi",
|
|
"Phage", "Plasmid", "Virus", "Yeast",
|
|
"1", "2", "3", "4", "5", "6", "7", "8", "9"],
|
|
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD19"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Taxon name",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "STD20"},
|
|
{TYPE: MISSING, ERROR_CODE: "STD21"},
|
|
{TYPE: TAXON, ERROR_CODE: "STD22", MULTIPLE: True,
|
|
SEPARATOR: ';'}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Infrasubspecific names",
|
|
},
|
|
{
|
|
FIELD: "Comment on taxonomy",
|
|
},
|
|
{
|
|
FIELD: "Interspecific hybrid",
|
|
VALIDATION: [
|
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
|
ERROR_CODE: "STD23"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Status",
|
|
},
|
|
{
|
|
FIELD: "History of deposit",
|
|
VALIDATION: [
|
|
# {TYPE: REGEXP, "match": "[^ ]* [^ ]*", ERROR_CODE: "STD24", # modify the regex
|
|
# MULTIPLE: True, SEPARATOR: ";"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Depositor"
|
|
},
|
|
{
|
|
FIELD: "Date of deposit",
|
|
VALIDATION: [
|
|
{TYPE: DATE, ERROR_CODE: "STD25"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Date of inclusion in the catalogue",
|
|
VALIDATION: [
|
|
{TYPE: DATE, ERROR_CODE: "STD26"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Collected by",
|
|
},
|
|
{
|
|
FIELD: "Date of collection",
|
|
VALIDATION: [
|
|
{TYPE: DATE, ERROR_CODE: "STD27"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Isolated by",
|
|
},
|
|
{
|
|
FIELD: "Date of isolation",
|
|
VALIDATION: [
|
|
{TYPE: DATE, ERROR_CODE: "STD28"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Substrate/host of isolation",
|
|
},
|
|
{
|
|
FIELD: "Tested temperature growth range",
|
|
VALIDATION: [
|
|
{TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?',
|
|
ERROR_CODE: "STD29", MULTIPLE: True, SEPARATOR: ";"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Recommended growth temperature",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "STD30"},
|
|
{TYPE: MISSING, ERROR_CODE: "STD31"},
|
|
{TYPE: REGEXP, "match": r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?',
|
|
ERROR_CODE: "STD32",
|
|
MULTIPLE: True, SEPARATOR: ";"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Recommended medium for growth",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "STD33"},
|
|
{TYPE: MISSING, ERROR_CODE: "STD34"},
|
|
{TYPE: CROSSREF, CROSSREF_NAME: "Growth media",
|
|
MULTIPLE: True, SEPARATOR: "/", ERROR_CODE: "STD35"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Form of supply",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "STD36"},
|
|
{TYPE: MISSING, ERROR_CODE: "STD37"},
|
|
{TYPE: CHOICES, VALUES: ['Agar', 'Cryo', 'Dry Ice', 'Liquid Culture Medium',
|
|
'Lyo', 'Oil', 'Water'],
|
|
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD38"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Other denomination",
|
|
},
|
|
{
|
|
FIELD: "Coordinates of geographic origin",
|
|
VALIDATION: [
|
|
{TYPE: COORDINATES, ERROR_CODE: "STD39"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Altitude of geographic origin",
|
|
VALIDATION: [
|
|
{TYPE: NUMBER, 'max': 8000, 'min': -200, ERROR_CODE: "STD40"},
|
|
]
|
|
},
|
|
{
|
|
# value can be in the cell or in another sheet. Don't configure this
|
|
FIELD: "Geographic origin",
|
|
},
|
|
{
|
|
FIELD: "Isolation habitat",
|
|
},
|
|
{
|
|
FIELD: "Ontobiotope term for the isolation habitat",
|
|
VALIDATION: [
|
|
{TYPE: CROSSREF, CROSSREF_NAME: "Ontobiotope",
|
|
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD41"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "GMO",
|
|
VALIDATION: [
|
|
{TYPE: CHOICES, VALUES: ["1", "2"],
|
|
ERROR_CODE: "STD42"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "GMO construction information",
|
|
},
|
|
{
|
|
FIELD: "Mutant information",
|
|
},
|
|
{
|
|
FIELD: "Genotype",
|
|
},
|
|
{
|
|
FIELD: "Sexual state",
|
|
VALIDATION: [
|
|
{TYPE: CROSSREF, CROSSREF_NAME: SEXUAL_STATE_SHEET,
|
|
ERROR_CODE: "STD43"}
|
|
# {TYPE: CHOICES, VALUES: ["Mata", "Matalpha", "Mata/Matalpha",
|
|
# "Matb", "Mata/Matb", "MTLa", "MTLalpha", "MTLa/MTLalpha",
|
|
# "MAT1-1", "MAT1-2", "MAT1", "MAT2", "MT+", "MT-"],
|
|
# ERROR_CODE: "STD43"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Ploidy",
|
|
VALIDATION: [
|
|
{TYPE: CHOICES, VALUES: ["0", "1", "2", "3", "4", "9"],
|
|
ERROR_CODE: "STD44"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Plasmids",
|
|
},
|
|
{
|
|
FIELD: "Plasmids collections fields",
|
|
},
|
|
{
|
|
# value can be in the cell or in another sheet. Don't configure this
|
|
FIELD: "Literature",
|
|
VALIDATION: [
|
|
{TYPE: CROSSREF, CROSSREF_NAME: LITERATURE_SHEET,
|
|
MULTIPLE: True, SEPARATOR: ";", ERROR_CODE: "STD45"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Plant pathogenicity code",
|
|
},
|
|
{
|
|
FIELD: "Pathogenicity",
|
|
},
|
|
{
|
|
FIELD: "Enzyme production",
|
|
},
|
|
{
|
|
FIELD: "Production of metabolites",
|
|
},
|
|
{
|
|
FIELD: "Applications",
|
|
},
|
|
{
|
|
FIELD: "Remarks"
|
|
},
|
|
{
|
|
FIELD: "Literature linked to the sequence/genome",
|
|
},
|
|
]
|
|
SHEETS_SCHEMA = {
|
|
LOCATIONS: {
|
|
"acronym": "GOD",
|
|
"id_field": "ID",
|
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS02"},
|
|
COLUMNS: [
|
|
{
|
|
FIELD: "ID",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "GOD01"},
|
|
{TYPE: MISSING, ERROR_CODE: "GOD02"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Country",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "GOD03"},
|
|
{TYPE: MISSING, ERROR_CODE: "GOD04"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Region",
|
|
VALIDATION: []
|
|
},
|
|
{
|
|
FIELD: "City",
|
|
VALIDATION: []
|
|
},
|
|
{
|
|
FIELD: "Locality",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "GOD06"},
|
|
{TYPE: MISSING, ERROR_CODE: "GOD07"}
|
|
]
|
|
}
|
|
],
|
|
},
|
|
GROWTH_MEDIA: {
|
|
"acronym": "GMD",
|
|
"id_field": "Acronym",
|
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS01"},
|
|
COLUMNS: [
|
|
{
|
|
FIELD: "Acronym",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "GMD01"},
|
|
{TYPE: MISSING, ERROR_CODE: "GMD02"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Description",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "GMD03"},
|
|
{TYPE: MISSING, ERROR_CODE: "GMD04"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Full description",
|
|
VALIDATION: []
|
|
},
|
|
],
|
|
},
|
|
GENOMIC_INFO: {
|
|
"acronym": "GID",
|
|
"id_field": "Strain AN",
|
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS08"},
|
|
COLUMNS: [
|
|
{
|
|
FIELD: "Strain AN",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "GID01"},
|
|
{TYPE: MISSING, ERROR_CODE: "GID02"},
|
|
{TYPE: CROSSREF, CROSSREF_NAME: "Strains",
|
|
ERROR_CODE: "GID03"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Marker",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "GID04"},
|
|
{TYPE: MISSING, ERROR_CODE: "GID05"},
|
|
{TYPE: CROSSREF, CROSSREF_NAME: MARKERS, ERROR_CODE: "GID06"}
|
|
]
|
|
},
|
|
{
|
|
FIELD: "INSDC AN",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "GID07"},
|
|
{TYPE: MISSING, ERROR_CODE: "GID08"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Sequence",
|
|
VALIDATION: []
|
|
},
|
|
],
|
|
},
|
|
STRAINS: {
|
|
"acronym": "STD",
|
|
'id_field': 'Accession number',
|
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS05"},
|
|
ROW_VALIDATION: [
|
|
{TYPE: NAGOYA, ERROR_CODE: "STD46"},
|
|
],
|
|
COLUMNS: STRAIN_FIELDS,
|
|
},
|
|
LITERATURE_SHEET: {
|
|
"acronym": "LID",
|
|
'id_field': 'ID',
|
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS03"},
|
|
ROW_VALIDATION: [
|
|
{TYPE: BIBLIO, ERROR_CODE: 'LID17'}
|
|
],
|
|
COLUMNS: [
|
|
{
|
|
FIELD: "ID",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "LID01"},
|
|
{TYPE: MISSING, ERROR_CODE: "LID02"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Full reference",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "LID03"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Authors",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "LID05"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Title",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "LID07"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Journal",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "LID09"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Year",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "LID11"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Volume",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "LID13"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Issue",
|
|
VALIDATION: []
|
|
},
|
|
{
|
|
FIELD: "First page",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "LID15"},
|
|
{TYPE: MISSING, ERROR_CODE: "LID16"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Last page",
|
|
VALIDATION: []
|
|
},
|
|
{
|
|
FIELD: "Book title",
|
|
VALIDATION: []
|
|
},
|
|
{
|
|
FIELD: "Editors",
|
|
VALIDATION: []
|
|
},
|
|
{
|
|
FIELD: "Publisher",
|
|
VALIDATION: []
|
|
}
|
|
],
|
|
},
|
|
# SEXUAL_STATE_SHEET: {"acronym": "SSD", COLUMNS: []},
|
|
# RESOURCE_TYPES_VALUES: {"acronym": "RTD", COLUMNS: []},
|
|
# FORM_OF_SUPPLY_SHEET: {"acronym": "FSD", COLUMNS: []},
|
|
# PLOIDY_SHEET: {"acronym": "PLD", COLUMNS: []},
|
|
ONTOBIOTOPE: {
|
|
"acronym": "OTD",
|
|
"id_field": "ID",
|
|
VALIDATION: {TYPE: MANDATORY, ERROR_CODE: "EFS06"},
|
|
COLUMNS: [
|
|
{
|
|
FIELD: "ID",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "OTD01"},
|
|
{TYPE: MISSING, ERROR_CODE: "OTD02"},
|
|
]
|
|
},
|
|
{
|
|
FIELD: "Name",
|
|
VALIDATION: [
|
|
{TYPE: MANDATORY, ERROR_CODE: "OTD03"},
|
|
{TYPE: MISSING, ERROR_CODE: "OTD04"},
|
|
]
|
|
},
|
|
]
|
|
},
|
|
MARKERS: {
|
|
"acronym": "MKD",
|
|
"id_field": "Acronym",
|
|
COLUMNS: [
|
|
{
|
|
FIELD: "Acronym",
|
|
VALIDATION: []
|
|
},
|
|
{
|
|
FIELD: "Marker",
|
|
VALIDATION: []
|
|
},
|
|
],
|
|
},
|
|
}
|
|
|
|
CROSS_REF_CONF = {
|
|
ONTOBIOTOPE: ['ID', 'Name'],
|
|
LITERATURE_SHEET: ['ID'],
|
|
LOCATIONS: ['Locality'],
|
|
GROWTH_MEDIA: ['Acronym'],
|
|
STRAINS: ["Accession number"],
|
|
SEXUAL_STATE_SHEET: [],
|
|
MARKERS: ["Acronym"],
|
|
}
|
|
|
|
MIRRI_20200601_VALLIDATION_CONF = {
|
|
'sheet_schema': SHEETS_SCHEMA,
|
|
'cross_ref_conf': CROSS_REF_CONF,
|
|
'keep_sheets_in_memory': [
|
|
{'sheet_name': LOCATIONS, 'indexed_by': 'Locality'}]
|
|
}
|