# files_process.py import os from typing import List, Dict import gradio as gr import pandas as pd from agents import QuestionExtractionEngine, AnsweringEngine # ---- Gradio handlers ---- def extract_questions_handler(guideline_pdf, surgery_type, suggestions): """ Returns (questions_list, questions_dataframe) """ if guideline_pdf is None: raise gr.Error("Please upload a surgical guideline PDF (e.g., Knee Arthroplasty guideline).") pdf_path = guideline_pdf.name if hasattr(guideline_pdf, "name") else str(guideline_pdf) q_engine = QuestionExtractionEngine(pdf_path, surgery_type or "Procedure", user_suggestions=suggestions or "") questions: List[str] = q_engine.run() if not questions: raise gr.Error("I couldn't extract any questions. Try adding suggestions or another guideline PDF.") df = pd.DataFrame({"#": list(range(1, len(questions) + 1)), "Question": questions}) return questions, df def answer_questions_handler(patient_pdf, questions_state): """ Returns a DataFrame: Question | Answer | Rationale """ if patient_pdf is None: raise gr.Error("Please upload a patient chart PDF.") if not questions_state: raise gr.Error("No questions available. Extract questions first.") pdf_path = patient_pdf.name if hasattr(patient_pdf, "name") else str(patient_pdf) a_engine = AnsweringEngine(pdf_path) rows = [] for q in questions_state: res: Dict[str, Dict[str, str]] = a_engine.answer_one(q) ans = res.get(q, {}) rows.append({ "Question": q, "Answer": ans.get("answer", "Not Found"), "Rationale": ans.get("rationale", "—") }) df = pd.DataFrame(rows, columns=["Question", "Answer", "Rationale"]) return df with gr.Blocks(title="Guideline → Questions → Chart Answers", theme="soft") as demo: gr.Markdown("# 🏥 Guideline Q&A (Yes/No) — with Iterative Feedback") with gr.Tabs(): with gr.Tab("1) Extract Questions"): with gr.Row(): guideline_pdf = gr.File(label="Upload Guideline PDF (e.g., Knee_Arthroplasty.pdf)", file_count="single") surgery_type = gr.Textbox(label="Surgery Type", value="Knee Arthroplasty") suggestions = gr.Textbox( label="Suggestions (optional)", placeholder="E.g., 'Focus on contraindications and pre-op lab thresholds; keep questions short.'" ) extract_btn = gr.Button("Extract Yes/No Questions", variant="primary") questions_state = gr.State([]) # list[str] questions_df = gr.Dataframe(headers=["#", "Question"], interactive=False, wrap=True) gr.Markdown("If the questions need refinement, update 'Suggestions' and click the button again.") def _extract_and_store(g_pdf, s_type, sugg): q_list, df = extract_questions_handler(g_pdf, s_type, sugg) return q_list, df extract_btn.click( fn=_extract_and_store, inputs=[guideline_pdf, surgery_type, suggestions], outputs=[questions_state, questions_df], ) with gr.Tab("2) Answer from Patient Chart"): patient_pdf = gr.File(label="Upload Patient Chart PDF (e.g., JAY_MORGAN.pdf)", file_count="single") answer_btn = gr.Button("Answer All Questions", variant="primary") answers_df = gr.Dataframe(headers=["Question", "Answer", "Rationale"], interactive=False, wrap=True) answer_btn.click( fn=answer_questions_handler, inputs=[patient_pdf, questions_state], outputs=answers_df, ) gr.Markdown( "### Notes\n" "- Embeddings are cached under `./embeddings//` to avoid recomputation.\n" "- Set your OpenAI key: `export OPENAI_API_KEY=...` before running.\n" "- If a PDF is scanned/image-only, text extraction may be poor (consider OCR pre-processing).\n" ) if __name__ == "__main__": # Optional: set server params via env (Arch-friendly) # os.environ.setdefault("GRADIO_SERVER_NAME", "0.0.0.0") # os.environ.setdefault("GRADIO_SERVER_PORT", "7860") demo.launch()