You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

118 lines
3.5 KiB

  1. import csv
  2. from datetime import date
  3. _required_fields = [
  4. ("name", "str"),
  5. ("formula", "str"),
  6. ("mass", "float"),
  7. ("final_mz", "float"),
  8. ("final_rt", "float"),
  9. ]
  10. _optional_fields = [
  11. ("chemical_db_id", "str"),
  12. ("library", "str"),
  13. ("pubchem_cid", "int"),
  14. ("pubmed_refcount", "int"),
  15. ("standard_class", "str"),
  16. ("inchikey", "str"),
  17. ("inchikey14", "str"),
  18. ("final_adduct", "str"),
  19. ("adduct", "str"),
  20. ("detected_adducts", "str"),
  21. ("adduct_calc_mz", "str"),
  22. ("msms_detected", "yesno"),
  23. ("msms_purity", "float"),
  24. ]
  25. _query_fields = [
  26. ("rt_min", "float"),
  27. ("rt_max", "float"),
  28. ("mz_min", "float"),
  29. ("mz_max", "float"),
  30. # ("year_max", "int"),
  31. # ("day_max", "int"),
  32. # ("month_max", "int"),
  33. ]
  34. def _validate_type(field: str, value: str, t):
  35. if t == "yesno":
  36. l = value.strip().lower()
  37. if l == "yes":
  38. return True
  39. elif l == "no":
  40. return False
  41. else:
  42. raise ValueError(
  43. f"Yes/No field {field} does not have a valid value {value}")
  44. elif t == "int":
  45. try:
  46. return int(value)
  47. except ValueError:
  48. raise ValueError(
  49. f"Integer field {field} does not have a valid value {value}")
  50. elif t == "float":
  51. try:
  52. return float(value)
  53. except ValueError:
  54. raise ValueError(
  55. f"Float field {field} does not have a valid value {value}")
  56. elif t == "str":
  57. return value
  58. else:
  59. raise ValueError("Impossible")
  60. def validate_insertion_csv_fields(reader: csv.DictReader) -> tuple[list[dict], str]:
  61. chemicals: list[dict] = []
  62. for row in reader:
  63. chemical = {}
  64. for field, t in _required_fields:
  65. if field not in row:
  66. return [], f"Required field \"{field}\" not present in csv"
  67. try:
  68. value = _validate_type(field, row[field], t)
  69. chemical[field] = value
  70. except ValueError as e:
  71. return [], str(e)
  72. for field, t in _optional_fields:
  73. if field not in row:
  74. continue
  75. try:
  76. value = _validate_type(field, row[field], t)
  77. chemical[field] = value
  78. except ValueError as e:
  79. return [], str(e)
  80. chemicals.append(chemical)
  81. return chemicals, ""
  82. def validate_query_csv_fields(reader: csv.DictReader) -> tuple[list[dict], str]:
  83. queries: list[dict] = []
  84. for row in reader:
  85. query = {}
  86. for field, t in _query_fields:
  87. if field not in row:
  88. return [], f"Required field \"{field}\" not present in csv"
  89. try:
  90. value = _validate_type(field, row[field], t)
  91. query[field] = value
  92. except ValueError as e:
  93. return [], str(e)
  94. # year_max, month_max, day_max = query.get(
  95. # 'year_max'), query.get('month_max'), query.get('day_max')
  96. # try:
  97. # d = date(year_max, month_max, day_max)
  98. # query["date"] = d
  99. # except ValueError as e:
  100. # return [], f"Invalid Date Value Provided for {month_max}/{day_max}/{year_max}"
  101. queries.append(query)
  102. return queries, ""