You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

136 lines
4.1 KiB

  1. import csv
  2. """
  3. Required fields when inserting into the database.
  4. """
  5. _required_fields = [
  6. # the "str" type means that this field can be any valid string.
  7. ("metabolite_name", "str"),
  8. ("formula", "str"),
  9. ("person_name", "str"),
  10. # any field labeled a "float" needs to have a value in decimal notation.
  11. ("mass", "float"),
  12. ("final_mz", "float"),
  13. ("final_rt", "float"),
  14. ("final_adduct", "str"),
  15. ("standard_grp", "str"),
  16. ("person_name", "str"),
  17. ("msms_detected", "yesno"), # Value can either be "Yes" or "No"
  18. ("inchikey", "str"),
  19. ]
  20. """
  21. Optional fields and corresponding types when batch inserting into the database.
  22. """
  23. _optional_fields = [
  24. ("chemical_db_id", "str"),
  25. ("library", "str"),
  26. ("pubchem_cid", "int"), # Only integers are permitted.
  27. ("pubmed_refcount", "int"),
  28. ("standard_class", "str"),
  29. ("inchikey14", "str"),
  30. ("adduct", "str"),
  31. ("detected_adducts", "str"),
  32. ("adduct_calc_mz", "str"),
  33. ("msms_purity", "float"),
  34. ]
  35. """
  36. All fields (excluding those that are commented) are mandatory to include.
  37. """
  38. _query_fields = [
  39. ("rt_min", "float"),
  40. ("rt_max", "float"),
  41. ("mz_min", "float"),
  42. ("mz_max", "float"),
  43. # ("year_max", "int"),
  44. # ("day_max", "int"),
  45. # ("month_max", "int"),
  46. ]
  47. def _validate_type(field: str, value: str, t):
  48. if t == "yesno":
  49. l = value.strip().lower()
  50. if l == "yes":
  51. return True
  52. elif l == "no":
  53. return False
  54. else:
  55. raise ValueError(
  56. f"Yes/No field {field} does not have a valid value {value}")
  57. elif t == "int":
  58. try:
  59. return int(value)
  60. except ValueError:
  61. raise ValueError(
  62. f"Integer field {field} does not have a valid value {value}")
  63. elif t == "float":
  64. try:
  65. return float(value)
  66. except ValueError:
  67. raise ValueError(
  68. f"Float field {field} does not have a valid value {value}")
  69. elif t == "str":
  70. return value
  71. else:
  72. raise ValueError("Impossible")
  73. def validate_insertion_csv_fields(reader: csv.DictReader) -> tuple[list[dict], str]:
  74. chemicals: list[dict] = []
  75. for row in reader:
  76. chemical = {}
  77. print("row", row)
  78. for field, t in _required_fields:
  79. if field not in row:
  80. return [], f"Required field \"{field}\" not present in csv"
  81. try:
  82. value = _validate_type(field, row[field], t)
  83. chemical[field] = value
  84. except ValueError as e:
  85. return [], str(e)
  86. for field, t in _optional_fields:
  87. if field not in row:
  88. continue
  89. try:
  90. value = _validate_type(field, row[field], t)
  91. chemical[field] = value
  92. except ValueError as e:
  93. return [], str(e)
  94. chemicals.append(chemical)
  95. return chemicals, ""
  96. def validate_query_csv_fields(reader: csv.DictReader) -> tuple[list[dict], str]:
  97. queries: list[dict] = []
  98. for row in reader:
  99. query = {}
  100. for field, t in _query_fields:
  101. if field not in row:
  102. return [], f"Required field \"{field}\" not present in csv"
  103. try:
  104. value = _validate_type(field, row[field], t)
  105. query[field] = value
  106. except ValueError as e:
  107. return [], str(e)
  108. # year_max, month_max, day_max = query.get(
  109. # 'year_max'), query.get('month_max'), query.get('day_max')
  110. # try:
  111. # d = date(year_max, month_max, day_max)
  112. # query["date"] = d
  113. # except ValueError as e:
  114. # return [], f"Invalid Date Value Provided for {month_max}/{day_max}/{year_max}"
  115. queries.append(query)
  116. return queries, ""