MCG-Assist-Question-Answering / files_process.py
abjasrees's picture
Update files_process.py
bdd2e75 verified
# files_process.py
import os
from typing import List, Dict
import gradio as gr
import pandas as pd
from agents import QuestionExtractionEngine, AnsweringEngine
# ---- Gradio handlers ----
def extract_questions_handler(guideline_pdf, surgery_type, suggestions):
"""
Returns (questions_list, questions_dataframe)
"""
if guideline_pdf is None:
raise gr.Error("Please upload a surgical guideline PDF (e.g., Knee Arthroplasty guideline).")
pdf_path = guideline_pdf.name if hasattr(guideline_pdf, "name") else str(guideline_pdf)
q_engine = QuestionExtractionEngine(pdf_path, surgery_type or "Procedure", user_suggestions=suggestions or "")
questions: List[str] = q_engine.run()
if not questions:
raise gr.Error("I couldn't extract any questions. Try adding suggestions or another guideline PDF.")
df = pd.DataFrame({"#": list(range(1, len(questions) + 1)), "Question": questions})
return questions, df
def answer_questions_handler(patient_pdf, questions_state):
"""
Returns a DataFrame: Question | Answer | Rationale
"""
if patient_pdf is None:
raise gr.Error("Please upload a patient chart PDF.")
if not questions_state:
raise gr.Error("No questions available. Extract questions first.")
pdf_path = patient_pdf.name if hasattr(patient_pdf, "name") else str(patient_pdf)
a_engine = AnsweringEngine(pdf_path)
rows = []
for q in questions_state:
res: Dict[str, Dict[str, str]] = a_engine.answer_one(q)
ans = res.get(q, {})
rows.append({
"Question": q,
"Answer": ans.get("answer", "Not Found"),
"Rationale": ans.get("rationale", "β€”")
})
df = pd.DataFrame(rows, columns=["Question", "Answer", "Rationale"])
return df
with gr.Blocks(title="Guideline β†’ Questions β†’ Chart Answers", theme="soft") as demo:
gr.Markdown("# πŸ₯ Guideline Q&A (Yes/No) β€” with Iterative Feedback")
with gr.Tabs():
with gr.Tab("1) Extract Questions"):
with gr.Row():
guideline_pdf = gr.File(label="Upload Guideline PDF (e.g., Knee_Arthroplasty.pdf)", file_count="single")
surgery_type = gr.Textbox(label="Surgery Type", value="Knee Arthroplasty")
suggestions = gr.Textbox(
label="Suggestions (optional)",
placeholder="E.g., 'Focus on contraindications and pre-op lab thresholds; keep questions short.'"
)
extract_btn = gr.Button("Extract Yes/No Questions", variant="primary")
questions_state = gr.State([]) # list[str]
questions_df = gr.Dataframe(headers=["#", "Question"], interactive=False, wrap=True)
gr.Markdown("If the questions need refinement, update 'Suggestions' and click the button again.")
def _extract_and_store(g_pdf, s_type, sugg):
q_list, df = extract_questions_handler(g_pdf, s_type, sugg)
return q_list, df
extract_btn.click(
fn=_extract_and_store,
inputs=[guideline_pdf, surgery_type, suggestions],
outputs=[questions_state, questions_df],
)
with gr.Tab("2) Answer from Patient Chart"):
patient_pdf = gr.File(label="Upload Patient Chart PDF (e.g., JAY_MORGAN.pdf)", file_count="single")
answer_btn = gr.Button("Answer All Questions", variant="primary")
answers_df = gr.Dataframe(headers=["Question", "Answer", "Rationale"], interactive=False, wrap=True)
answer_btn.click(
fn=answer_questions_handler,
inputs=[patient_pdf, questions_state],
outputs=answers_df,
)
gr.Markdown(
"### Notes\n"
"- Embeddings are cached under `./embeddings/<PDF-name>/` to avoid recomputation.\n"
"- Set your OpenAI key: `export OPENAI_API_KEY=...` before running.\n"
"- If a PDF is scanned/image-only, text extraction may be poor (consider OCR pre-processing).\n"
)
if __name__ == "__main__":
# Optional: set server params via env (Arch-friendly)
# os.environ.setdefault("GRADIO_SERVER_NAME", "0.0.0.0")
# os.environ.setdefault("GRADIO_SERVER_PORT", "7860")
demo.launch()