""" Created on 2020(e)ko abe. 1(a) @author: peio """ from __future__ import annotations import re from collections import OrderedDict from copy import deepcopy from typing import List, Union import pycountry from mirri import ValidationError from mirri.entities._private_classes import _FieldBasedClass, FrozenClass from mirri.entities.date_range import DateRange from mirri.entities.location import Location from mirri.entities.publication import Publication from mirri.entities.sequence import GenomicSequence from mirri.settings import ( ABS_RELATED_FILES, ACCESSION_NAME, ACCESSION_NUMBER, ALLOWED_FORMS_OF_SUPPLY, ALLOWED_MARKER_TYPES, ALLOWED_NAGOYA_OPTIONS, ALLOWED_PLOIDIES, ALLOWED_RESTRICTION_USE_OPTIONS, ALLOWED_RISK_GROUPS, ALLOWED_SUBTAXA, ALLOWED_TAXONOMIC_RANKS, APPLICATIONS, COLLECT, COLLECTED_BY, COLLECTION_CODE, COMMENTS_ON_TAXONOMY, DATE_OF_COLLECTION, DATE_OF_INCLUSION, DATE_OF_ISOLATION, DEPOSIT, DEPOSITOR, DUAL_USE, ENZYME_PRODUCTION, FORM_OF_SUPPLY, GENETICS, GENOTYPE, GENUS, GMO, GMO_CONSTRUCTION_INFO, GROWTH, HISTORY_OF_DEPOSIT, INFRASUBSPECIFIC_NAME, INTERSPECIFIC_HYBRID, ISOLATED_BY, ISOLATION, ISOLATION_HABITAT, LITERATURE_LINKED_TO_SEQ_GENOME, LOCATION, MARKER_INSDC, MARKER_SEQ, MARKER_TYPE, MARKERS, MTA_FILES, MUTANT_INFORMATION, NAGOYA_PROTOCOL, ONTOBIOTOPE_ISOLATION_HABITAT, ORGANISM_TYPE, OTHER_CULTURE_NUMBERS, PATHOGENICITY, PLANT_PATHOGENICITY_CODE, PLASMIDS, PLASMIDS_COLLECTION_FIELDS, PLOIDY, PRODUCTION_OF_METABOLITES, PUBLICATIONS, QUARANTINE, RECOMMENDED_GROWTH_MEDIUM, RECOMMENDED_GROWTH_TEMP, REMARKS, RESTRICTION_ON_USE, RISK_GROUP, SEXUAL_STATE, SPECIES, STATUS, STRAIN_FROM_REGISTERED_COLLECTION, STRAIN_ID, STRAIN_PUI, STRAIN_URL, SUBSTRATE_HOST_OF_ISOLATION, ID_SYNONYMS, TAXONOMY, TESTED_TEMPERATURE_GROWTH_RANGE, SUBTAXAS, DATE_OF_DEPOSIT, HYBRIDS, ) RANK_TRANSLATOR = { "subspecies": "subsp.", "convarietas": "convar.", "variety": "var.", "group": "Group", "forma": "f.", "forma.specialis": 'f.sp.' } # ORG_TYPES = { # "algae": 1, # "archaea": 2, # "bacteria": 3, # "fungi": 4, # "virus": 5, # "yeast": 6, # } ORG_TYPES = { "Algae": 1, "Archaea": 2, "Bacteria": 3, "Cyanobacteria": 4, "Filamentous Fungi": 5, "Phage": 6, "Plasmid": 7, "Virus": 8, "Yeast": 9, } class OrganismType(FrozenClass): def __init__(self, value=None): self._data = {} self.guess_type(value) self._freeze() def dict(self): return self._data def __str__(self): return f"{self.code} {self.name}" @property def code(self): return self._data.get("code", None) @code.setter def code(self, code: int): try: code = int(code) except TypeError as error: msg = f"code {code} not accepted for organism type" raise ValidationError(msg) from error if code not in ORG_TYPES.values(): msg = f"code {code} not accepted for organism type" raise ValidationError(msg) self._data["code"] = code name = None for _name, _code in ORG_TYPES.items(): if _code == code: name = _name self._data["name"] = name @property def name(self): return self._data.get("name", None) @name.setter def name(self, name: str): error_msg = f"name {name} not accepted for organism type" accepted_types = ORG_TYPES.keys() if name not in accepted_types: raise ValidationError(error_msg) self._data["name"] = name # TODO: are we case sensitive? self._data["code"] = ORG_TYPES[name] def guess_type(self, value): if value is None or not value: raise ValueError(" Can not set an empty value") try: value = int(value) self.code = value except ValueError: self.name = value class Taxonomy(FrozenClass): def __init__(self, data=None): self._data = {} if data is not None: if ORGANISM_TYPE in data: self.organism_type = [OrganismType(ot) for ot in data[ORGANISM_TYPE]] if GENUS in data: self.genus = data[GENUS] if SPECIES in data: self.species = data[SPECIES] if INFRASUBSPECIFIC_NAME in data: self.infrasubspecific_name = data[INFRASUBSPECIFIC_NAME] if COMMENTS_ON_TAXONOMY in data: self.comments = data[COMMENTS_ON_TAXONOMY] if INTERSPECIFIC_HYBRID in data: self.interspecific_hybrid = data[INTERSPECIFIC_HYBRID] if HYBRIDS in data: self.hybrids = data[HYBRIDS] self._freeze() def __bool__(self): return bool(self._data) def dict(self): data = {} for key, value in self._data.items(): if value is None: continue if key == ORGANISM_TYPE: value = [val.dict() for val in value] data[key] = value return data def __getitem__(self, key): return self._data[key] @property def organism_type(self): return self._data.get(ORGANISM_TYPE, None) @organism_type.setter def organism_type(self, organism_type: List[OrganismType]): if isinstance(organism_type, list) and all( isinstance(x, OrganismType) for x in organism_type ): self._data[ORGANISM_TYPE] = organism_type else: msg = "organism_type must be a list of OrganismType instances" raise ValidationError(msg) @property def infrasubspecific_name(self): return self._data.get(INFRASUBSPECIFIC_NAME, None) @infrasubspecific_name.setter def infrasubspecific_name(self, name): self._data[INFRASUBSPECIFIC_NAME] = name @property def comments(self): return self._data.get(COMMENTS_ON_TAXONOMY, None) @comments.setter def comments(self, comments): self._data[COMMENTS_ON_TAXONOMY] = comments @property def interspecific_hybrid(self): return self._data.get(INTERSPECIFIC_HYBRID, None) @interspecific_hybrid.setter def interspecific_hybrid(self, interspecific_hybrid): self._data[INTERSPECIFIC_HYBRID] = interspecific_hybrid @property def genus(self): return self._data.get(GENUS, {}).get("name", None) @genus.setter def genus(self, genus): if GENUS not in self._data: self._data[GENUS] = {} self._data[GENUS]["name"] = genus @property def species(self): return self._data.get(SPECIES, {}).get("name", None) @species.setter def species(self, species): self._data[SPECIES] = {"name": species} @property def species_author(self): return self._data.get(SPECIES, {}).get("author", None) @species_author.setter def species_author(self, species_author): if not self.species: msg = "Can not set species author if species is not set" raise ValidationError(msg) self._data[SPECIES]["author"] = species_author @property def hybrids(self) -> list[str]: return self._data.get(HYBRIDS, None) @hybrids.setter def hybrids(self, hybrids: List[str]): if isinstance(hybrids, (tuple, list)): self._data[HYBRIDS] = hybrids @property def subtaxas(self): return { key: value for key, value in self._data.items() if key in ALLOWED_SUBTAXA } def get_subtaxa_name(self, rank): return self._data.get(rank, {}).get("name", None) def get_subtaxa_author(self, rank): return self._data.get(rank, {}).get("author", None) def set_subtaxa_name(self, rank, name): if rank in ALLOWED_SUBTAXA: self._data[rank] = {"name": name} def set_subtaxa_author(self, rank, author): if rank in ALLOWED_SUBTAXA and self.get_subtaxa_name(rank): self._data[rank]["author"] = author def add_subtaxa(self, subtaxa_rank, subtaxa_name, subtaxa_author=None): if subtaxa_rank not in ALLOWED_SUBTAXA: raise ValidationError("{} Rank not allowed".format(subtaxa_rank)) if subtaxa_rank not in self._data: self._data[subtaxa_rank] = {} self._data[subtaxa_rank] = {"name": subtaxa_name} if subtaxa_author: self._data[subtaxa_rank]["author"] = subtaxa_author @property def long_name(self): # from multicrop passport descriptors 2.1 # ‘subsp.’ (for subspecies); ‘convar.’ (for convariety); # ‘var.’ (for variety); ‘f.’ (for form); # ‘Group’ (for ‘cultivar group’) # f.sp. for forma.specialis if self.hybrids: return ';'.join(self.hybrids) taxas = [] for rank in ALLOWED_TAXONOMIC_RANKS: value = self.get_subtaxa_name(rank) if value: rank = RANK_TRANSLATOR.get(rank, None) if rank: taxas.append(rank) taxas.append(value) return " ".join(taxas) if taxas else None @property def taxons(self): taxons = OrderedDict() for rank in ALLOWED_TAXONOMIC_RANKS: taxa = self._data.get(rank, {}).get("name", None) author = self._data.get(rank, {}).get("author", None) if taxa: if author: taxa += " " + author taxons[rank] = taxa return taxons @property def composed_taxons(self): taxas = [] for rank in ALLOWED_TAXONOMIC_RANKS: value = self.get_subtaxa_name(rank) # print(value, rank) if value: rank_trans = RANK_TRANSLATOR.get(rank, None) if rank_trans: taxas.extend([rank_trans, value]) else: taxas.append(value) yield rank, " ".join(taxas) if rank == "family": taxas = [] class _GeneralStep(FrozenClass): _date_tag = None _who_tag = None _location_tag = None def __init__(self, data=None): self._data = {} if data is None: data = {} if self._location_tag is not None: self.location = Location(data.get(self._location_tag, None)) if self._date_tag: self.who = data.get(self._who_tag, None) if self._date_tag: _date = DateRange() if data and self._date_tag in data: _date = _date.strpdate(data[self._date_tag]) self.date = _date def __bool__(self): return bool(self.location) or bool(self.date) or bool(self.who) @property def location(self) -> Location: return self._data.get(self._location_tag, None) @location.setter def location(self, location: Location): if self._location_tag is None: return ValidationError("Can't set location on this class") if not isinstance(location, Location): raise ValidationError("Location must be a Location instance") self._data[self._location_tag] = location @property def who(self) -> str: return self._data.get(self._who_tag, None) @who.setter def who(self, by_who: str): if self._who_tag is None: return ValidationError("Can set who on this class") self._data[self._who_tag] = by_who @property def date(self) -> DateRange: return self._data.get(self._date_tag, None) @date.setter def date(self, _date: DateRange): if self._date_tag is None: return ValidationError("Can set date on this class") if _date is not None: if not isinstance(_date, DateRange): raise ValidationError("Date must be a DateRange instance") self._data[self._date_tag] = _date def dict(self): _data = {} if self.location: _data[self._location_tag] = self.location.dict() if self.who: _data[self._who_tag] = self._data[self._who_tag] if self.date: _data[self._date_tag] = self._data[self._date_tag].strfdate return _data class Collect(_GeneralStep): _date_tag = DATE_OF_COLLECTION _who_tag = COLLECTED_BY _location_tag = LOCATION def __init__(self, data=None): super().__init__(data=data) if data is None: data = {} self.habitat = data.get(ISOLATION_HABITAT, None) self.habitat_ontobiotope = data.get(ONTOBIOTOPE_ISOLATION_HABITAT, None) self._freeze() def __str__(self): info = "" if self.location: info += f"{pycountry.countries.get(alpha_3=str(self.location.country)).name}" if self.date: info += f" in {self.date.strfdate}" if self.who: info += f" by {self.who}" if info: info = f"Collected: {info}" return info def dict(self): _data = super().dict() if ISOLATION_HABITAT in self._data: _data[ISOLATION_HABITAT] = self._data[ISOLATION_HABITAT] if ONTOBIOTOPE_ISOLATION_HABITAT in self._data: ontotype = self._data[ONTOBIOTOPE_ISOLATION_HABITAT] _data[ONTOBIOTOPE_ISOLATION_HABITAT] = ontotype return _data @property def habitat(self): return self._data.get(ISOLATION_HABITAT, None) @habitat.setter def habitat(self, habitat: str): if habitat is not None: self._data[ISOLATION_HABITAT] = habitat @property def habitat_ontobiotope(self): return self._data.get(ONTOBIOTOPE_ISOLATION_HABITAT, None) @habitat_ontobiotope.setter def habitat_ontobiotope(self, habitat: str): if habitat is not None: if not re.match("OB[ST]:[0-9]{6}", habitat): raise ValidationError( f"Bad ontobiotope format, {habitat}") self._data[ONTOBIOTOPE_ISOLATION_HABITAT] = habitat class Isolation(_GeneralStep): _who_tag = ISOLATED_BY _date_tag = DATE_OF_ISOLATION def __init__(self, data=None): if data is None: data = {} super().__init__(data=data) _date = DateRange() self.substrate_host_of_isolation = data.get(SUBSTRATE_HOST_OF_ISOLATION, None) self._freeze() def dict(self): _data = super().dict() return _data @property def substrate_host_of_isolation(self): return self._data.get(SUBSTRATE_HOST_OF_ISOLATION, None) @substrate_host_of_isolation.setter def substrate_host_of_isolation(self, value: str): if value is not None: self._data[SUBSTRATE_HOST_OF_ISOLATION] = value class Deposit(_GeneralStep): _who_tag = DEPOSITOR _date_tag = DATE_OF_DEPOSIT def __init__(self, data=None): if data is None: data = {} super().__init__(data=data) self._freeze() class StrainId(FrozenClass): def __init__(self, id_dict=None, collection=None, number=None): if id_dict and (collection or number): msg = "Can not initialize with dict and number or collection" raise ValidationError(msg) if id_dict is None: id_dict = {} self._id_dict = id_dict if collection: self.collection = collection if number: self.number = number self._freeze() def __bool__(self): return bool(self._id_dict) def __eq__(self, other): return self.collection == other.collection and self.number == other.number def __ne__(self, other): return not self.__eq__(other) def __str__(self): if self.number is None and self.collection is None: return None _id = '' if self.collection is not None: _id += f'{self.collection} ' _id += self.number return _id def dict(self): return self._id_dict @property def strain_id(self): return self.__str__() @property def collection(self): return self._id_dict.get(COLLECTION_CODE, None) @collection.setter def collection(self, collection): assert collection and isinstance(collection, str) self._id_dict[COLLECTION_CODE] = collection @property def number(self): return self._id_dict.get(ACCESSION_NUMBER, None) @number.setter def number(self, germplasm_number): assert germplasm_number and isinstance(germplasm_number, str) self._id_dict[ACCESSION_NUMBER] = germplasm_number @property def pui(self): return self._id_dict.get(STRAIN_PUI, None) @pui.setter def pui(self, pui): assert pui and isinstance(pui, str) self._id_dict[STRAIN_PUI] = pui @property def url(self): return self._id_dict.get(STRAIN_URL, None) @url.setter def url(self, url): assert url and isinstance(url, str) self._id_dict[STRAIN_URL] = url def keys(self): return self._id_dict.keys() def copy(self): return StrainId(self._id_dict) class Genetics(FrozenClass): def __init__(self, data=None): self._data = {} if data and SEXUAL_STATE in data: self.sexual_state = data[SEXUAL_STATE] if data and PLOIDY in data: self.ploidy = data[PLOIDY] if data and GMO in data: self.gmo = data[GMO] if data and MUTANT_INFORMATION in data: self.mutant_info = data[MUTANT_INFORMATION] if data and GMO_CONSTRUCTION_INFO in data: self.gmo_construction = data[GMO_CONSTRUCTION_INFO] if data and GENOTYPE in data: self.genotype = data[GENOTYPE] if data and MARKERS in data: self.markers = [ GenomicSequence(marker_data) for marker_data in data[MARKERS] ] else: self.markers = [] self._freeze() def __bool__(self): data = deepcopy(self._data) if MARKERS in data: markers = data.pop(MARKERS) return bool(markers or data) else: return bool(data) def dict(self): data = {} for key, value in self._data.items(): if value is None or value == []: continue elif isinstance(value, list): a = [] for v in value: if not isinstance(v, str): a.append(v.dict()) else: a.append(v) value = a data[key] = value return data @property def sexual_state(self) -> str: return self._data.get(SEXUAL_STATE, None) @sexual_state.setter def sexual_state(self, state: str): self._data[SEXUAL_STATE] = state @property def ploidy(self) -> int: return self._data.get(PLOIDY, None) @ploidy.setter def ploidy(self, value: int): if value is not None: if value not in ALLOWED_PLOIDIES: msg = f"{value} not in allowed ploidies: " msg += f'{", ".join(str(p) for p in ALLOWED_PLOIDIES)}' raise ValidationError(msg) self._data[PLOIDY] = value @property def gmo(self) -> bool: return self._data.get(GMO, None) @gmo.setter def gmo(self, value: bool): if value is not None and not isinstance(value, bool): raise ValidationError("Gmos value must be boolean") self._data[GMO] = value @property def gmo_construction(self) -> str: return self._data.get(GMO_CONSTRUCTION_INFO, None) @gmo_construction.setter def gmo_construction(self, value: str): self._data[GMO_CONSTRUCTION_INFO] = value @property def mutant_info(self) -> str: return self._data.get(MUTANT_INFORMATION, None) @mutant_info.setter def mutant_info(self, value: str): self._data[MUTANT_INFORMATION] = value @property def genotype(self) -> str: return self._data.get(GENOTYPE, None) @genotype.setter def genotype(self, value: str): self._data[GENOTYPE] = value @property def plasmids(self) -> List[str]: return self._data.get(PLASMIDS, None) @plasmids.setter def plasmids(self, value: List[str]): self._data[PLASMIDS] = value @property def plasmids_in_collections(self): return self._data.get(PLASMIDS_COLLECTION_FIELDS, None) @plasmids_in_collections.setter def plasmids_in_collections(self, value: List[str]): self._data[PLASMIDS_COLLECTION_FIELDS] = value @property def markers(self) -> List[GenomicSequence]: return self._data.get(MARKERS, None) @markers.setter def markers(self, value: List[GenomicSequence]): for marker in value: if not isinstance(marker, GenomicSequence): msg = "Markers needs to be a GenomicSecuence instances list" raise ValidationError(msg) self._data[MARKERS] = value class Growth(_FieldBasedClass): _fields = [ {"attribute": "tested_temp_range", "label": TESTED_TEMPERATURE_GROWTH_RANGE}, {"attribute": "recommended_media", "label": RECOMMENDED_GROWTH_MEDIUM}, {"attribute": "recommended_temp", "label": RECOMMENDED_GROWTH_TEMP}, ] @property def tested_temp_range(self) -> dict: return self._data.get(TESTED_TEMPERATURE_GROWTH_RANGE, None) @tested_temp_range.setter def tested_temp_range(self, val: dict): if val is not None: if "min" in val and "max" in val: self._data[TESTED_TEMPERATURE_GROWTH_RANGE] = val else: msg = "A dict with min and max is required" raise ValidationError(msg) @property def recommended_media(self) -> List[str]: return self._data.get(RECOMMENDED_GROWTH_MEDIUM, None) @recommended_media.setter def recommended_media(self, value): if value is not None: if not isinstance(value, (list, set)): msg = "Recommendedn media must be a list" raise ValidationError(msg) self._data[RECOMMENDED_GROWTH_MEDIUM] = value @property def recommended_temp(self) -> dict: return self._data.get(RECOMMENDED_GROWTH_TEMP, None) @recommended_temp.setter def recommended_temp(self, val: dict): if val is not None: if isinstance(val, dict) and "min" in val and "max" in val: self._data[RECOMMENDED_GROWTH_TEMP] = val else: msg = "A dict with min and max is required" raise ValidationError(msg) class Strain(FrozenClass): def __init__(self, data=None): self._data = {} if data is None: data = {} self.nagoya_protocol = data.get(NAGOYA_PROTOCOL, None) self.risk_group = data.get(RISK_GROUP, None) self.restriction_on_use = data.get(RESTRICTION_ON_USE, None) self.status = data.get(STATUS, None) self.abs_related_files = data.get(ABS_RELATED_FILES, None) self.mta_files = data.get(MTA_FILES, None) self.is_potentially_harmful = data.get(DUAL_USE, None) self.is_from_registered_collection = data.get( STRAIN_FROM_REGISTERED_COLLECTION, None ) self.is_subject_to_quarantine = data.get(QUARANTINE, None) inclusion_date = data.get(DATE_OF_INCLUSION, None) if inclusion_date: _date = DateRange() inclusion_date = _date.strpdate(inclusion_date) self.catalog_inclusion_date = inclusion_date self.id = StrainId(data.get(STRAIN_ID, None)) self.taxonomy = Taxonomy(data.get(TAXONOMY, None)) self.deposit = Deposit(data.get(DEPOSIT, None)) self.collect = Collect(data.get(COLLECT, None)) self.isolation = Isolation(data.get(ISOLATION, None)) self.growth = Growth(data.get(GROWTH, None)) self.genetics = Genetics(data.get(GENETICS, None)) self.other_numbers = [] if data and OTHER_CULTURE_NUMBERS in data: for other_number in data[OTHER_CULTURE_NUMBERS]: self.other_numbers.append(StrainId(other_number)) self.publications = [] if data and PUBLICATIONS in data: for pub in data[PUBLICATIONS]: self.publications.append(Publication(pub)) self._freeze() def __str__(self): return f"Strain {self.id.collection} {self.id.number}" def dict(self): data = {} for field, value in self._data.items(): if field in [STRAIN_ID, COLLECT, DEPOSIT, ISOLATION, GROWTH, GENETICS, TAXONOMY]: value = value.dict() if value == {}: value = None elif field in [OTHER_CULTURE_NUMBERS, PUBLICATIONS, ID_SYNONYMS]: value = [item.dict() for item in value] if value == []: value = None elif field == DATE_OF_INCLUSION: value = value.strfdate 0 if value is not None: data[field] = value return data @property def id(self) -> StrainId: return self._data.get(STRAIN_ID, None) @id.setter def id(self, _id: StrainId): self._data[STRAIN_ID] = _id @property def synonyms(self) -> List[StrainId]: return self._data.get(ID_SYNONYMS, None) @synonyms.setter def synonyms(self, ids: List[StrainId]): self._data[ID_SYNONYMS] = ids @property def nagoya_protocol(self) -> str: return self._data.get(NAGOYA_PROTOCOL, None) @nagoya_protocol.setter def nagoya_protocol(self, nagoya): if nagoya is not None: if nagoya not in ALLOWED_NAGOYA_OPTIONS: msg = "The 'Nagoya protocol restrictions and compliance " msg += "conditions' for strain with Accession Number " msg += f"{self.id.collection}{self.id.number} is not " msg += "according to the specification." # msg = f"Nagoya protocol options not matched: {nagoya}" # msg += f' options: {", ".join(ALLOWED_NAGOYA_OPTIONS)}' raise ValidationError(msg) self._data[NAGOYA_PROTOCOL] = nagoya @property def risk_group(self) -> str: return self._data.get(RISK_GROUP, None) @risk_group.setter def risk_group(self, risk_gr: Union[str, int, None]): # we have to check if there are some more options if risk_gr is not None: risk_gr = str(risk_gr) if risk_gr not in ALLOWED_RISK_GROUPS: msg = "The 'Risk group' for strain with Accession Number " msg += f"{self.id.collection}{self.id.number} is not according " msg += "to specification." # msg = f"Value ({risk_gr}) not in the allowed options: " # msg += f"{', '.join(ALLOWED_RISK_GROUPS)}" raise ValidationError(msg) self._data[RISK_GROUP] = str(risk_gr) @property def restriction_on_use(self) -> Union[str, None]: return self._data.get(RESTRICTION_ON_USE, None) @restriction_on_use.setter def restriction_on_use(self, restriction: str): if restriction is not None: if restriction not in ALLOWED_RESTRICTION_USE_OPTIONS: msg = "The 'Restriction on use' for strain with Accession " msg += f"Number {self.id.collection} {self.id.number} is not " msg += "according to the specification." raise ValidationError(msg) self._data[RESTRICTION_ON_USE] = restriction @property def is_potentially_harmful(self) -> bool: # can_be_use_as_weapon return self._data.get(DUAL_USE, None) @is_potentially_harmful.setter def is_potentially_harmful(self, is_harmful: bool): # Specify whether the strain has the potential for a harmful use # according to import pprint # EU Council Regulation 2000/1334/CEand its amendments # and corrections if is_harmful is not None: if not isinstance(is_harmful, bool): msg = "is_potentially harmful must be True/False" raise ValidationError(msg) self._data[DUAL_USE] = is_harmful @property def is_subject_to_quarantine(self) -> bool: return self._data[QUARANTINE] @is_subject_to_quarantine.setter def is_subject_to_quarantine(self, quarantine: bool): if quarantine is not None and not isinstance(quarantine, bool): msg = "Is subject to quarantine must be boolean" raise ValidationError(msg) self._data[QUARANTINE] = quarantine @property def is_from_registered_collection(self) -> bool: return self._data.get(STRAIN_FROM_REGISTERED_COLLECTION, None) @is_from_registered_collection.setter def is_from_registered_collection(self, value: bool): if value is not None: if not isinstance(value, bool): msg = "is from reg_collection must be boolean" raise ValidationError(msg) self._data[STRAIN_FROM_REGISTERED_COLLECTION] = value @property def catalog_inclusion_date(self) -> DateRange: return self._data.get(DATE_OF_INCLUSION, None) @catalog_inclusion_date.setter def catalog_inclusion_date(self, _date: Union[None, DateRange]): if _date is not None: if not isinstance(_date, DateRange): raise ValidationError("Date must be a DateRange instance") self._data[DATE_OF_INCLUSION] = _date @property def abs_related_files(self) -> List[str]: return self._data.get(ABS_RELATED_FILES, None) @abs_related_files.setter def abs_related_files(self, value: List[str]): if value is not None and not isinstance(value, list): raise ValidationError("Value must be a list") if value is not None: self._data[ABS_RELATED_FILES] = value @property def mta_files(self) -> List[str]: return self._data.get(MTA_FILES, None) @mta_files.setter def mta_files(self, value: List[str]): if value is not None and not isinstance(value, list): raise ValidationError("Value must be a list") if value is not None: self._data[MTA_FILES] = value @property def other_numbers(self) -> List[StrainId]: return self._data.get(OTHER_CULTURE_NUMBERS, None) @other_numbers.setter def other_numbers(self, value: List[StrainId]): for on in value: if not isinstance(on, StrainId): msg = "Other number must be a list of Strain Id instances" raise ValidationError(msg) self._data[OTHER_CULTURE_NUMBERS] = value @property def other_denominations(self) -> List[str]: return self._data.get(ACCESSION_NAME, None) @other_denominations.setter def other_denominations(self, value: List[str]): self._data[ACCESSION_NAME] = value @property def history(self) -> Union[List[str], None]: return self._data.get(HISTORY_OF_DEPOSIT) @history.setter def history(self, value: Union[str, None]): if value: value = [item.strip() for item in value.split("<")] value = list(filter(bool, value)) self._data[HISTORY_OF_DEPOSIT] = value @property def form_of_supply(self) -> List[str]: return self._data.get(FORM_OF_SUPPLY, None) @form_of_supply.setter def form_of_supply(self, value: List[str]): allowed = {f.lower() for f in ALLOWED_FORMS_OF_SUPPLY} if {v.lower() for v in value}.difference(allowed): msg = f"Not allowed forms of supply {value}: " msg += f"{', '.join(ALLOWED_FORMS_OF_SUPPLY)}" raise ValidationError(msg) self._data[FORM_OF_SUPPLY] = value @property def taxonomy(self) -> Taxonomy: return self._data.get(TAXONOMY, None) @taxonomy.setter def taxonomy(self, value: Taxonomy): self._data[TAXONOMY] = value @property def collect(self) -> Collect: return self._data.get(COLLECT, None) @collect.setter def collect(self, _collect: Collect): self._data[COLLECT] = _collect @property def deposit(self) -> Deposit: return self._data.get(DEPOSIT, None) @deposit.setter def deposit(self, _deposit: Deposit): self._data[DEPOSIT] = _deposit @property def isolation(self) -> Isolation: return self._data.get(ISOLATION, None) @isolation.setter def isolation(self, _isolation: Isolation): self._data[ISOLATION] = _isolation @property def growth(self) -> Growth: return self._data.get(GROWTH, None) @growth.setter def growth(self, _growth: Growth): self._data[GROWTH] = _growth @property def genetics(self) -> Genetics: return self._data.get(GENETICS, None) @genetics.setter def genetics(self, _genetics: Genetics): self._data[GENETICS] = _genetics @property def publications(self) -> Union[List[Publication], None]: return self._data.get(PUBLICATIONS, None) @publications.setter def publications(self, value: List[Publication]): if value is not None: error_msg = "Publications must be list Publication instances" if not isinstance(value, list): raise ValidationError(error_msg) for pub in value: if not isinstance(pub, Publication): raise ValidationError(error_msg) self._data[PUBLICATIONS] = value # mierder @property def pathogenicity(self) -> str: return self._data.get(PATHOGENICITY, None) @pathogenicity.setter def pathogenicity(self, value: str): self._data[PATHOGENICITY] = value @property def enzyme_production(self) -> str: return self._data.get(ENZYME_PRODUCTION, None) @enzyme_production.setter def enzyme_production(self, value: str): if value: self._data[ENZYME_PRODUCTION] = value @property def production_of_metabolites(self) -> str: return self._data.get(PRODUCTION_OF_METABOLITES, None) @production_of_metabolites.setter def production_of_metabolites(self, value: str): self._data[PRODUCTION_OF_METABOLITES] = value @property def remarks(self) -> str: return self._data.get(REMARKS, None) @remarks.setter def remarks(self, value: str): self._data[REMARKS] = value @property def applications(self) -> str: return self._data.get(APPLICATIONS, None) @applications.setter def applications(self, value: str): self._data[APPLICATIONS] = value @property def status(self) -> str: return self._data.get(STATUS, None) @status.setter def status(self, value: str): self._data[STATUS] = value @property def plant_pathogenicity_code(self) -> str: return self._data.get(PLANT_PATHOGENICITY_CODE, None) @plant_pathogenicity_code.setter def plant_pathogenicity_code(self, value: str): self._data[PLANT_PATHOGENICITY_CODE] = value @property def literature_linked_to_the_sequence_genome(self) -> str: return self._data.get(LITERATURE_LINKED_TO_SEQ_GENOME, None) @literature_linked_to_the_sequence_genome.setter def literature_linked_to_the_sequence_genome(self, value: str): self._data[LITERATURE_LINKED_TO_SEQ_GENOME] = value class StrainMirri(Strain): @property def record_id(self): return self._data.get('record_id', None) @record_id.setter def record_id(self, value: int): self._data['record_id'] = value @property def record_name(self): return self._data.get('record_name', None) @record_name.setter def record_name(self, value: int): self._data['record_name'] = value def add_taxon_to_strain(strain, value): value = value.strip() if not value: return if "*" in value or "×" in value: spps = re.split('\*|×', value) sp1 = spps[0] sp2 = f'{spps[0].split()[0]} {spps[1]}' spps = [sp1, sp2] else: spps = [v.strip() for v in value.split(';')] if len(spps) == 2: strain.taxonomy.hybrids = spps strain.taxonomy.interspecific_hybrid = True return value = spps[0] items = re.split(r" +", value) genus = items[0] strain.taxonomy.genus = genus if len(items) > 1: species = items[1] if species in ("sp", "spp", ".sp", "sp."): species = None return strain.taxonomy.species = species if len(items) > 2: rank = None name = None for index in range(0, len(items[2:]), 2): rank = SUBTAXAS.get(items[index + 2], None) if rank is None: raise ValidationError( f'The "Taxon Name" for strain with accession number {strain.id.collection} {strain.id.number} is not according to specification.' ) name = items[index + 3] strain.taxonomy.add_subtaxa(rank, name)