You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

133 lines
4.0 KiB

import csv
"""
Required fields when inserting into the database.
"""
_required_fields = [
# the "str" type means that this field can be any valid string.
("metabolite_name", "str"),
("formula", "str"),
# any field labeled a "float" needs to have a value in decimal notation.
("mass", "float"),
("final_mz", "float"),
("final_rt", "float"),
("final_adduct", "str"),
("standard_grp", "str"),
("msms_detected", "yesno"), # Value can either be "Yes" or "No"
("inchikey", "str"),
]
"""
Optional fields and corresponding types when batch inserting into the database.
"""
_optional_fields = [
("chemical_db_id", "str"),
("library", "str"),
("pubchem_cid", "int"), # Only integers are permitted.
("pubmed_refcount", "int"),
("standard_class", "str"),
("inchikey14", "str"),
("adduct", "str"),
("detected_adducts", "str"),
("adduct_calc_mz", "str"),
("msms_purity", "float"),
]
"""
All fields (excluding those that are commented) are mandatory to include.
"""
_query_fields = [
("rt_min", "float"),
("rt_max", "float"),
("mz_min", "float"),
("mz_max", "float"),
# ("year_max", "int"),
# ("day_max", "int"),
# ("month_max", "int"),
]
def _validate_type(field: str, value: str, t):
if t == "yesno":
l = value.strip().lower()
if l == "yes":
return True
elif l == "no":
return False
else:
raise ValueError(
f"Yes/No field {field} does not have a valid value {value}")
elif t == "int":
try:
return int(value)
except ValueError:
raise ValueError(
f"Integer field {field} does not have a valid value {value}")
elif t == "float":
try:
return float(value)
except ValueError:
raise ValueError(
f"Float field {field} does not have a valid value {value}")
elif t == "str":
return value
else:
raise ValueError("Impossible")
def validate_insertion_csv_fields(reader: csv.DictReader) -> tuple[list[dict], str]:
chemicals: list[dict] = []
for row in reader:
chemical = {}
for field, t in _required_fields:
if field not in row:
return [], f"Required field \"{field}\" not present in csv"
try:
value = _validate_type(field, row[field], t)
chemical[field] = value
except ValueError as e:
return [], str(e)
for field, t in _optional_fields:
if field not in row:
continue
try:
value = _validate_type(field, row[field], t)
chemical[field] = value
except ValueError as e:
return [], str(e)
chemicals.append(chemical)
return chemicals, ""
def validate_query_csv_fields(reader: csv.DictReader) -> tuple[list[dict], str]:
queries: list[dict] = []
for row in reader:
query = {}
for field, t in _query_fields:
if field not in row:
return [], f"Required field \"{field}\" not present in csv"
try:
value = _validate_type(field, row[field], t)
query[field] = value
except ValueError as e:
return [], str(e)
# year_max, month_max, day_max = query.get(
# 'year_max'), query.get('month_max'), query.get('day_max')
# try:
# d = date(year_max, month_max, day_max)
# query["date"] = d
# except ValueError as e:
# return [], f"Invalid Date Value Provided for {month_max}/{day_max}/{year_max}"
queries.append(query)
return queries, ""