Browse Source

batch adding supported

master
Juni Kim 2 years ago
parent
commit
10765bcc8f
  1. 1
      .gitignore
  2. 0
      __init__.py
  3. 97
      app.py
  4. 10
      templates/admin.html
  5. 0
      templates/batch.html
  6. 21
      templates/batchadd.html
  7. 47
      templates/batchquery.html
  8. 118
      validate.py

1
.gitignore

@ -162,3 +162,4 @@ cython_debug/
#.idea/ #.idea/
/R /R
.swp .swp
/test

0
__init__.py

97
app.py

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from datetime import date from datetime import date
import os
from flask import Flask, render_template, session, request, abort, redirect, url_for, jsonify from flask import Flask, render_template, session, request, abort, redirect, url_for, jsonify
from flask_sqlalchemy import SQLAlchemy from flask_sqlalchemy import SQLAlchemy
from sqlalchemy import inspect, and_ from sqlalchemy import inspect, and_
@ -8,6 +9,9 @@ from flask_wtf import FlaskForm
import bcrypt import bcrypt
from wtforms_alchemy import model_form_factory from wtforms_alchemy import model_form_factory
from flask_migrate import Migrate from flask_migrate import Migrate
from uuid import uuid4
import csv
from validate import validate_insertion_csv_fields, validate_query_csv_fields
app = Flask(__name__) app = Flask(__name__)
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///project.db" app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///project.db"
@ -51,8 +55,10 @@ class Admin(db.Model):
@classmethod @classmethod
def authorize(cls): def authorize(cls):
if not session.get('admin'):
if "admin" not in session:
return redirect(url_for("admin_login")) return redirect(url_for("admin_login"))
else:
return None
def object_as_dict(obj): def object_as_dict(obj):
@ -153,7 +159,8 @@ def admin_login():
@app.route('/admin/logout', methods=['GET']) @app.route('/admin/logout', methods=['GET'])
def admin_logout(): def admin_logout():
session.pop('admin')
if "admin" in session:
session.pop('admin')
return redirect(url_for('home')) return redirect(url_for('home'))
@ -252,12 +259,12 @@ def search_api():
Chemical.final_mz > mz_min) Chemical.final_mz > mz_min)
rt_filter = and_(rt_max > Chemical.final_rt, rt_filter = and_(rt_max > Chemical.final_rt,
Chemical.final_rt > rt_min) Chemical.final_rt > rt_min)
date_filter = date(year_max, month_max, day_max) >= Chemical.createdAt
# date_filter = date(year_max, month_max, day_max) >= Chemical.createdAt
except ValueError as e: except ValueError as e:
return jsonify({"error": str(e)}), 400 return jsonify({"error": str(e)}), 400
result = Chemical.query.filter( result = Chemical.query.filter(
and_(mz_filter, rt_filter, date_filter)
and_(mz_filter, rt_filter)
).limit(20).all() ).limit(20).all()
data = [] data = []
@ -267,6 +274,88 @@ def search_api():
return jsonify(data) return jsonify(data)
# Utilities for doing add and search operations in batch
# no file over 3MB is allowed.
app.config['MAX_CONTENT_LENGTH'] = 3 * 1000 * 1000
@app.route("/chemical/batchadd", methods=["GET", "POST"])
def batch_add_request():
if not session.get('admin'):
abort(403)
if request.method == "POST":
if "csv" not in request.files or request.files["csv"].filename == '':
return render_template("batchadd.html", invalid="Blank file included")
# save the file to RAM
file = request.files["csv"]
os.makedirs("/tmp/walkerdb", exist_ok=True)
filename = os.path.join("/tmp/walkerdb", str(uuid4()))
file.save(filename)
# perform cleanup regardless of what happens.
def cleanup(): return os.remove(filename)
# read it as a csv
with open(filename, "r") as csvfile:
reader = csv.DictReader(csvfile)
results, error = validate_insertion_csv_fields(reader)
if error:
cleanup()
return render_template("batchadd.html", invalid=error)
else:
chemicals = [Chemical(**result) for result in results]
db.session.add_all(chemicals)
db.session.commit()
cleanup()
return render_template("batchadd.html", success=True)
else:
return render_template("batchadd.html")
@app.route("/chemical/batch", methods=["GET", "POST"])
def batch_query_request():
if not session.get('admin'):
abort(403)
if request.method == "POST":
if "csv" not in request.files or request.files["csv"].filename == '':
return render_template("batchadd.html", invalid="Blank file included")
# save the file to RAM
file = request.files["csv"]
os.makedirs("/tmp/walkerdb", exist_ok=True)
filename = os.path.join("/tmp/walkerdb", str(uuid4()))
file.save(filename)
# perform cleanup regardless of what happens.
def cleanup(): return os.remove(filename)
# read it as a csv
with open(filename, "r") as csvfile:
reader = csv.DictReader(csvfile)
queries, error = validate_query_csv_fields(reader)
if error:
cleanup()
return render_template("batchquery.html", invalid=error)
else:
# generate the queries here.
data = []
for query in queries:
mz_filter = and_(query["mz_max"] > Chemical.final_mz,
Chemical.final_mz > query["mz_min"])
rt_filter = and_(query["rt_max"] > Chemical.final_rt,
Chemical.final_rt > query["rt_min"])
# date_filter = query["date"] >= Chemical.createdAt
result = Chemical.query.filter(
and_(mz_filter, rt_filter)
).limit(5).all()
hits = []
for x in result:
hits.append({"url": url_for("chemical_view", id=x.id),
"name": x.name, "mz": x.final_mz, "rt": x.final_rt})
data.append(dict(
query=query,
hits=hits,
))
cleanup()
return render_template("batchquery.html", success=True, data=data)
return render_template("batchquery.html")
@app.route("/search") @app.route("/search")
def search(): def search():
return render_template("search.html") return render_template("search.html")

10
templates/admin.html

@ -12,6 +12,16 @@
Add a Chemical Add a Chemical
</button> </button>
</a> </a>
<a href="{{url_for('batch_add_request')}}">
<button>
Batch Add Chemicals
</button>
</a>
<a href="{{url_for('batch_query_request')}}">
<button>
Batch Search Chemicals
</button>
</a>
<h2>Admin Authentication</h2> <h2>Admin Authentication</h2>
<p> <p>
Since there is now an admin, only admins can create new admin accounts. You can do so through the <code>/admin/create</code> Since there is now an admin, only admins can create new admin accounts. You can do so through the <code>/admin/create</code>

0
templates/batch.html

21
templates/batchadd.html

@ -0,0 +1,21 @@
{% extends "base.html" %}
{% block content %}
<h2>Batch Upload Chemicals</h2>
<a href="https://git.junickim.me/junikimm717/walker-database/raw/master/validate.py"> Source Code with required type definitions </a>
<form method="post" enctype="multipart/form-data">
<label for="csv">CSV: </label>
<input type="file" name="csv">
<input type="submit" value="Submit">
</form>
{% if invalid %}
<p style="color: red;">Data Points are Incorrectly added: {{invalid}}</p>
{% endif %}
{% if success %}
<p style="color: green;">Success!</p>
{% endif %}
{% endblock %}

47
templates/batchquery.html

@ -0,0 +1,47 @@
{% extends "base.html" %}
{% block content %}
<h2>Batch Query Chemicals</h2>
<a href="https://git.junickim.me/junikimm717/walker-database/raw/master/validate.py"> Source Code with required type definitions </a>
<form method="post" enctype="multipart/form-data">
<label for="csv">CSV: </label>
<input type="file" name="csv">
<input type="submit" value="Submit">
</form>
{% if invalid %}
<p style="color: red;">Data Points are Incorrectly added: {{invalid}}</p>
{% endif %}
{% if success %}
<p style="color: green;">Success!</p>
{% for result in data %}
<hr>
<h2>Query {{loop.index}}</h2>
<p>
{{result.query.mz_min}} &lt; M/Z Ratio &lt; {{result.query.mz_max}},
{{result.query.rt_min}} &lt; Retention Time &lt; {{result.query.rt_max}}
</p>
{% for hit in result.hits %}
<div>
<a href="{{hit.url}}">
<h3>{{hit.name}}</h3>
</a>
<table>
<tr>
<td>Retention Time</td>
<td>{{hit.rt}}</td>
</tr>
<tr>
<td>M/Z Ratio</td>
<td>{{hit.mz}}</td>
</tr>
</table>
</div>
{% endfor %}
{% endfor %}
{% endif %}
{% endblock %}

118
validate.py

@ -0,0 +1,118 @@
import csv
from datetime import date
_required_fields = [
("name", "str"),
("formula", "str"),
("mass", "float"),
("final_mz", "float"),
("final_rt", "float"),
]
_optional_fields = [
("chemical_db_id", "str"),
("library", "str"),
("pubchem_cid", "int"),
("pubmed_refcount", "int"),
("standard_class", "str"),
("inchikey", "str"),
("inchikey14", "str"),
("final_adduct", "str"),
("adduct", "str"),
("detected_adducts", "str"),
("adduct_calc_mz", "str"),
("msms_detected", "yesno"),
("msms_purity", "float"),
]
_query_fields = [
("rt_min", "float"),
("rt_max", "float"),
("mz_min", "float"),
("mz_max", "float"),
# ("year_max", "int"),
# ("day_max", "int"),
# ("month_max", "int"),
]
def _validate_type(field: str, value: str, t):
if t == "yesno":
l = value.strip().lower()
if l == "yes":
return True
elif l == "no":
return False
else:
raise ValueError(
f"Yes/No field {field} does not have a valid value {value}")
elif t == "int":
try:
return int(value)
except ValueError:
raise ValueError(
f"Integer field {field} does not have a valid value {value}")
elif t == "float":
try:
return float(value)
except ValueError:
raise ValueError(
f"Float field {field} does not have a valid value {value}")
elif t == "str":
return value
else:
raise ValueError("Impossible")
def validate_insertion_csv_fields(reader: csv.DictReader) -> tuple[list[dict], str]:
chemicals: list[dict] = []
for row in reader:
chemical = {}
for field, t in _required_fields:
if field not in row:
return [], f"Required field \"{field}\" not present in csv"
try:
value = _validate_type(field, row[field], t)
chemical[field] = value
except ValueError as e:
return [], str(e)
for field, t in _optional_fields:
if field not in row:
continue
try:
value = _validate_type(field, row[field], t)
chemical[field] = value
except ValueError as e:
return [], str(e)
chemicals.append(chemical)
return chemicals, ""
def validate_query_csv_fields(reader: csv.DictReader) -> tuple[list[dict], str]:
queries: list[dict] = []
for row in reader:
query = {}
for field, t in _query_fields:
if field not in row:
return [], f"Required field \"{field}\" not present in csv"
try:
value = _validate_type(field, row[field], t)
query[field] = value
except ValueError as e:
return [], str(e)
# year_max, month_max, day_max = query.get(
# 'year_max'), query.get('month_max'), query.get('day_max')
# try:
# d = date(year_max, month_max, day_max)
# query["date"] = d
# except ValueError as e:
# return [], f"Invalid Date Value Provided for {month_max}/{day_max}/{year_max}"
queries.append(query)
return queries, ""
Loading…
Cancel
Save