CCG_mirri_utils/tests/test_validation.py

590 lines
17 KiB
Python

from datetime import datetime
import unittest
from pathlib import Path
from itertools import chain
from mirri.validation.tags import (
CHOICES,
COORDINATES,
CROSSREF,
CROSSREF_NAME,
DATE,
MATCH,
MISSING,
MULTIPLE,
NUMBER,
REGEXP,
SEPARATOR,
TAXON,
TYPE,
UNIQUE,
VALUES
)
from mirri.validation.excel_validator import (
is_valid_choices,
is_valid_coords,
is_valid_crossrefs,
is_valid_date,
is_valid_missing,
is_valid_number,
is_valid_regex,
is_valid_taxon,
is_valid_unique,
is_valid_file,
validate_mirri_excel,
)
TEST_DATA_DIR = Path(__file__).parent / "data"
TS_VALUE = "value"
TS_CONF = "conf"
TS_ASSERT = "assert_func"
class MirriExcelValidationTests(unittest.TestCase):
def test_validation_structure(self):
in_path = TEST_DATA_DIR / "invalid_structure.mirri.xlsx"
with in_path.open("rb") as fhand:
error_log = validate_mirri_excel(fhand)
entities = []
err_codes = []
for ett, errors in error_log.get_errors().items():
entities.append(ett)
err_codes.extend([err.code for err in errors])
self.assertIn("EFS", entities)
self.assertIn("STD", entities)
self.assertIn("GOD", entities)
self.assertIn("GMD", entities)
self.assertIn("EFS03", err_codes)
self.assertIn("EFS06", err_codes)
self.assertIn("EFS08", err_codes)
self.assertIn("GOD06", err_codes)
self.assertIn("GMD01", err_codes)
self.assertIn("STD05", err_codes)
self.assertIn("STD08", err_codes)
self.assertIn("STD12", err_codes)
def test_validation_content(self):
in_path = TEST_DATA_DIR / "invalid_content.mirri.xlsx"
with in_path.open("rb") as fhand:
error_log = validate_mirri_excel(fhand)
entities = []
err_codes = []
for ett, errors in error_log.get_errors().items():
entities.append(ett)
err_codes.extend([err.code for err in errors])
self.assertTrue(len(err_codes) > 0)
self.assertNotIn("EFS", entities)
self.assertIn("STD", entities)
self.assertIn("GOD", entities)
self.assertIn("GID", entities)
self.assertIn("GOD04", err_codes)
self.assertIn("GOD07", err_codes)
self.assertIn("GID03", err_codes)
self.assertIn("STD11", err_codes)
self.assertIn("STD15", err_codes)
self.assertIn("STD22", err_codes)
self.assertIn("STD04", err_codes)
self.assertIn("STD10", err_codes)
self.assertIn("STD07", err_codes)
self.assertIn("STD14", err_codes)
self.assertIn("STD16", err_codes)
def test_validation_valid(self):
in_path = TEST_DATA_DIR / "valid.mirri.xlsx"
with in_path.open("rb") as fhand:
error_log = validate_mirri_excel(fhand)
self.assertTrue(len(error_log.get_errors()) == 0)
class ValidatoionFunctionsTest(unittest.TestCase):
def test_is_valid_regex(self):
tests = [
{
TS_VALUE: "abcDEF",
TS_CONF: {TYPE: REGEXP, MATCH: r"[a-zA-Z]+"},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "123456",
TS_CONF: {TYPE: REGEXP, MATCH: r"[a-zA-Z]+"},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: "123456",
TS_CONF: {TYPE: REGEXP, MATCH: r"\d+"},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "abcdef",
TS_CONF: {TYPE: REGEXP, MATCH: r"\d+"},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: "abc 123",
TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "123 abc",
TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "123 ",
TS_CONF: {TYPE: REGEXP, MATCH: r"\w+(\s\w+)*$"},
TS_ASSERT: self.assertFalse
},
]
for test in tests:
value = test[TS_VALUE]
conf = test[TS_CONF]
assert_func = test[TS_ASSERT]
with self.subTest(value=value):
assert_func(is_valid_regex(value, conf))
def test_is_valid_choices(self):
tests = [
{
TS_VALUE: "1",
TS_CONF: {TYPE: CHOICES, VALUES: ["1", "2", "3", "4"]},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "1, 3",
TS_CONF: {
TYPE: CHOICES,
VALUES: ["1", "2", "3", "4"],
MULTIPLE: True,
SEPARATOR: ","
},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "5",
TS_CONF: {TYPE: CHOICES, VALUES: ["1", "2", "3", "4"]},
TS_ASSERT: self.assertFalse
},
]
for test in tests:
value = test[TS_VALUE]
conf = test[TS_CONF]
assert_func = test[TS_ASSERT]
with self.subTest(value=value):
assert_func(is_valid_choices(value, conf))
def test_is_valid_crossref(self):
tests = [
{
TS_VALUE: "abc",
TS_CONF: {
TYPE: CROSSREF,
CROSSREF_NAME: "values",
"crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
},
TS_ASSERT: self.assertTrue,
},
{
TS_VALUE: "123",
TS_CONF: {
TYPE: CROSSREF,
CROSSREF_NAME: "values",
"crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
},
TS_ASSERT: self.assertFalse,
},
{
TS_VALUE: "abc, def",
TS_CONF: {
TYPE: CROSSREF,
CROSSREF_NAME: "values",
"crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
MULTIPLE: True,
SEPARATOR: ",",
},
TS_ASSERT: self.assertTrue,
},
{
TS_VALUE: "abc, 123",
TS_CONF: {
TYPE: CROSSREF,
CROSSREF_NAME: "values",
"crossrefs_pointer": {"values": ["abc", "def", "ghi"]},
MULTIPLE: True,
SEPARATOR: ",",
},
TS_ASSERT: self.assertFalse,
},
]
for test in tests:
value = test[TS_VALUE]
conf = test[TS_CONF]
assert_func = test[TS_ASSERT]
with self.subTest(value=value):
assert_func(is_valid_crossrefs(value, conf))
def test_is_valid_missing(self):
tests = [
{
TS_VALUE: 1,
TS_CONF: {TYPE: MISSING},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "abc",
TS_CONF: {TYPE: MISSING},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: None,
TS_CONF: {TYPE: MISSING},
TS_ASSERT: self.assertFalse
},
]
for test in tests:
value = test[TS_VALUE]
conf = test[TS_CONF]
assert_func = test[TS_ASSERT]
with self.subTest(value=value):
assert_func(is_valid_missing(value, conf))
def test_is_valid_date(self):
tests = [
{
TS_VALUE: '2020-04-07',
TS_CONF: {TYPE: DATE},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: '2020/04/07',
TS_CONF: {TYPE: DATE},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: datetime(2021, 5, 1),
TS_CONF: {TYPE: DATE},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: '2020-05',
TS_CONF: {TYPE: DATE},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: '2020/05',
TS_CONF: {TYPE: DATE},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: 2020,
TS_CONF: {TYPE: DATE},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: '2021 05 01',
TS_CONF: {TYPE: DATE},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: '04-07-2020',
TS_CONF: {TYPE: DATE},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: '2021-02-31',
TS_CONF: {TYPE: DATE},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: '2021-15',
TS_CONF: {TYPE: DATE},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: '15-2021',
TS_CONF: {TYPE: DATE},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: 3000,
TS_CONF: {TYPE: DATE},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: -2020,
TS_CONF: {TYPE: DATE},
TS_ASSERT: self.assertFalse
},
]
for test in tests:
value = test[TS_VALUE]
conf = test[TS_CONF]
assert_func = test[TS_ASSERT]
with self.subTest(value=value):
assert_func(is_valid_date(value, conf))
def test_is_valid_coordinates(self):
tests = [
{
TS_VALUE: "23; 50",
TS_CONF: {TYPE: COORDINATES},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "-90; -100",
TS_CONF: {TYPE: COORDINATES},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "90; 100",
TS_CONF: {TYPE: COORDINATES},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "0; 0",
TS_CONF: {TYPE: COORDINATES},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "10; 20; 5",
TS_CONF: {TYPE: COORDINATES},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "10; 20; -5",
TS_CONF: {TYPE: COORDINATES},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "91; 50",
TS_CONF: {TYPE: COORDINATES},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: "87; 182",
TS_CONF: {TYPE: COORDINATES},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: "-200; 182",
TS_CONF: {TYPE: COORDINATES},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: "20, 40",
TS_CONF: {TYPE: COORDINATES},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: "abc def",
TS_CONF: {TYPE: COORDINATES},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: 123,
TS_CONF: {TYPE: COORDINATES},
TS_ASSERT: self.assertFalse
},
]
for test in tests:
value = test[TS_VALUE]
conf = test[TS_CONF]
assert_func = test[TS_ASSERT]
with self.subTest(value=value):
assert_func(is_valid_coords(value, conf))
def test_is_valid_number(self):
tests = [
{
TS_VALUE: 1,
TS_CONF: {TYPE: NUMBER},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: 2.5,
TS_CONF: {TYPE: NUMBER},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "10",
TS_CONF: {TYPE: NUMBER},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "10.5",
TS_CONF: {TYPE: NUMBER},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: 5,
TS_CONF: {TYPE: NUMBER, "min": 0},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: 5,
TS_CONF: {TYPE: NUMBER, "max": 10},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: 5,
TS_CONF: {TYPE: NUMBER, "min": 0, "max": 10},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "hello",
TS_CONF: {TYPE: NUMBER},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: 10,
TS_CONF: {TYPE: NUMBER, "max": 5},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: 0,
TS_CONF: {TYPE: NUMBER, "min": 5},
TS_ASSERT: self.assertFalse
},
]
for test in tests:
value = test[TS_VALUE]
conf = test[TS_CONF]
assert_func = test[TS_ASSERT]
with self.subTest(value=value):
assert_func(is_valid_number(value, conf))
def test_is_valid_taxon(self):
tests = [
{
TS_VALUE: 'sp. species',
TS_CONF: {TYPE: TAXON},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: 'spp species subsp. subspecies',
TS_CONF: {TYPE: TAXON},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: 'spp species subsp. subspecies var. variety',
TS_CONF: {TYPE: TAXON},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: 'spp taxon',
TS_CONF: {TYPE: TAXON},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: 'Candidaceae',
TS_CONF: {TYPE: TAXON},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: 'sp sp species',
TS_CONF: {TYPE: TAXON},
TS_ASSERT: self.assertFalse
},
{
TS_VALUE: 'spp species abc. def',
TS_CONF: {TYPE: TAXON},
TS_ASSERT: self.assertFalse
},
]
for test in tests:
value = test[TS_VALUE]
conf = test[TS_CONF]
assert_func = test[TS_ASSERT]
with self.subTest(value=value):
assert_func(is_valid_taxon(value, conf))
def test_is_valid_unique(self):
tests = [
{
TS_VALUE: "abc",
TS_CONF: {
TYPE: UNIQUE,
"label": "values",
"shown_values": {}
},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "jkl",
TS_CONF: {
TYPE: UNIQUE,
"label": "values",
"shown_values": {
"values": {"abc": '',
"def": '',
"ghi": ''},
}
},
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: "abc",
TS_CONF: {
TYPE: UNIQUE,
"label": "values",
"shown_values": {
"values": {"abc": '',
"def": '',
"ghi": ''},
}
},
TS_ASSERT: self.assertFalse
},
]
for test in tests:
value = test[TS_VALUE]
conf = test[TS_CONF]
assert_func = test[TS_ASSERT]
with self.subTest(value=value):
assert_func(is_valid_unique(value, conf))
def test_is_valid_file(self):
tests = [
{
TS_VALUE: TEST_DATA_DIR / "invalid_structure.mirri.xlsx",
TS_ASSERT: self.assertTrue
},
{
TS_VALUE: TEST_DATA_DIR / "invalid_excel.mirri.json",
TS_ASSERT: self.assertFalse
},
]
for test in tests:
value = test[TS_VALUE]
assert_func = test[TS_ASSERT]
with self.subTest(value=value):
assert_func(is_valid_file(value,))
if __name__ == "__main__":
import sys
# sys.argv = ['',
# 'ValidatoionFunctionsTest.test_is_valid_regex']
unittest.main()