|
|
import streamlit as st
|
|
|
from pathlib import Path
|
|
|
import requests
|
|
|
|
|
|
API_URL = "http://localhost:8000"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BASE_DIR = Path.cwd()
|
|
|
QUERY_DIR = BASE_DIR / "queries"
|
|
|
QUERY_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if "saved_docs" not in st.session_state:
|
|
|
st.session_state.saved_docs = []
|
|
|
|
|
|
if "saved_once" not in st.session_state:
|
|
|
st.session_state.saved_once = set()
|
|
|
|
|
|
saved_docs = st.session_state.saved_docs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Exam Pipeline", layout="wide")
|
|
|
st.title("π Exam Question Processing Pipeline")
|
|
|
|
|
|
api_key = "your_groq_api_key_here"
|
|
|
|
|
|
num_docs = st.number_input(
|
|
|
"Number of documents",
|
|
|
min_value=1,
|
|
|
step=1
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for i in range(num_docs):
|
|
|
st.subheader(f"Document {i+1}")
|
|
|
|
|
|
doc_type = st.radio(
|
|
|
"Input type",
|
|
|
["Images", "PDF"],
|
|
|
key=f"type_{i}"
|
|
|
)
|
|
|
|
|
|
|
|
|
if doc_type == "Images":
|
|
|
uploaded_images = st.file_uploader(
|
|
|
"Upload images",
|
|
|
type=["png", "jpg", "jpeg"],
|
|
|
accept_multiple_files=True,
|
|
|
key=f"img_{i}"
|
|
|
)
|
|
|
|
|
|
if uploaded_images and st.button(f"Save Images as PDF (Doc {i+1})"):
|
|
|
unique_key = f"img_{i}_{','.join(img.name for img in uploaded_images)}"
|
|
|
|
|
|
if unique_key not in st.session_state.saved_once:
|
|
|
res = requests.post(
|
|
|
f"{API_URL}/images-to-pdf",
|
|
|
files=[
|
|
|
("files", (img.name, img.getvalue(), img.type))
|
|
|
for img in uploaded_images
|
|
|
]
|
|
|
)
|
|
|
|
|
|
if res.status_code != 200:
|
|
|
st.error(res.text)
|
|
|
st.stop()
|
|
|
|
|
|
data = res.json()
|
|
|
if "error" in data:
|
|
|
st.error(data["error"])
|
|
|
st.stop()
|
|
|
|
|
|
pdf_path = data["path"]
|
|
|
saved_docs.append(pdf_path)
|
|
|
st.session_state.saved_once.add(unique_key)
|
|
|
|
|
|
st.success(f"Saved β {pdf_path}")
|
|
|
else:
|
|
|
st.info("Images already saved for this document.")
|
|
|
|
|
|
|
|
|
else:
|
|
|
uploaded_pdf = st.file_uploader(
|
|
|
"Upload PDF",
|
|
|
type=["pdf"],
|
|
|
key=f"pdf_{i}"
|
|
|
)
|
|
|
|
|
|
if uploaded_pdf:
|
|
|
unique_key = f"pdf_{i}_{uploaded_pdf.name}"
|
|
|
|
|
|
if unique_key not in st.session_state.saved_once:
|
|
|
res = requests.post(
|
|
|
f"{API_URL}/save-pdf",
|
|
|
files={
|
|
|
"file": (
|
|
|
uploaded_pdf.name,
|
|
|
uploaded_pdf.getvalue(),
|
|
|
"application/pdf"
|
|
|
)
|
|
|
}
|
|
|
)
|
|
|
|
|
|
if res.status_code != 200:
|
|
|
st.error(res.text)
|
|
|
st.stop()
|
|
|
|
|
|
data = res.json()
|
|
|
if "error" in data:
|
|
|
st.error(data["error"])
|
|
|
st.stop()
|
|
|
|
|
|
pdf_path = data["path"]
|
|
|
saved_docs.append(pdf_path)
|
|
|
st.session_state.saved_once.add(unique_key)
|
|
|
|
|
|
st.success(f"Saved β {pdf_path}")
|
|
|
else:
|
|
|
st.info("PDF already saved for this document.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import time
|
|
|
import time
|
|
|
|
|
|
if st.button("π Run Full Pipeline"):
|
|
|
|
|
|
if not api_key:
|
|
|
st.error("β API key required")
|
|
|
st.stop()
|
|
|
|
|
|
if not saved_docs:
|
|
|
st.error("β Please upload images or PDFs first")
|
|
|
st.stop()
|
|
|
|
|
|
|
|
|
start_res = requests.post(
|
|
|
f"{API_URL}/run-pipeline",
|
|
|
params={"api_key": api_key},
|
|
|
json=saved_docs
|
|
|
)
|
|
|
|
|
|
if start_res.status_code != 200:
|
|
|
st.error(start_res.text)
|
|
|
st.stop()
|
|
|
|
|
|
start_data = start_res.json()
|
|
|
job_id = start_data.get("job_id")
|
|
|
|
|
|
if not job_id:
|
|
|
st.error("β Failed to start pipeline")
|
|
|
st.stop()
|
|
|
|
|
|
|
|
|
st.subheader("π Pipeline Progress")
|
|
|
status_box = st.empty()
|
|
|
|
|
|
final_result = None
|
|
|
|
|
|
while True:
|
|
|
status_res = requests.get(f"{API_URL}/job-status/{job_id}")
|
|
|
|
|
|
if status_res.status_code != 200:
|
|
|
st.error(status_res.text)
|
|
|
st.stop()
|
|
|
|
|
|
status_data = status_res.json()
|
|
|
status_text = status_data.get("status", "Unknown status")
|
|
|
|
|
|
status_box.info(status_text)
|
|
|
|
|
|
|
|
|
if status_text.startswith("β"):
|
|
|
st.error(status_text)
|
|
|
st.stop()
|
|
|
|
|
|
|
|
|
if status_text == "β
Completed":
|
|
|
final_result = status_data.get("result")
|
|
|
break
|
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
|
if not final_result:
|
|
|
st.error("Pipeline finished but no result returned")
|
|
|
st.stop()
|
|
|
|
|
|
st.success("β
Pipeline completed successfully!")
|
|
|
|
|
|
final_pdf = final_result["final_pdf"]
|
|
|
freq_json = final_result["frequency_json"]
|
|
|
|
|
|
with open(final_pdf, "rb") as f:
|
|
|
st.download_button(
|
|
|
"π₯ Download Final PDF",
|
|
|
f,
|
|
|
file_name="Exam_Frequency_Report.pdf"
|
|
|
)
|
|
|
|
|
|
with open(freq_json, "rb") as f:
|
|
|
st.download_button(
|
|
|
"π₯ Download Frequency JSON",
|
|
|
f,
|
|
|
file_name="output_frequency.json"
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.subheader("π PDFs available in queries/")
|
|
|
st.write([str(p) for p in QUERY_DIR.glob("*.pdf")])
|
|
|
|
|
|
st.subheader("π Recently Uploaded PDFs (This Session Only)")
|
|
|
if saved_docs:
|
|
|
st.write(saved_docs)
|
|
|
else:
|
|
|
st.info("No documents uploaded in this session.")
|
|
|
|