File size: 4,300 Bytes
b444f65
bdd2e75
 
 
 
 
 
 
 
 
 
 
 
b444f65
bdd2e75
b444f65
bdd2e75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b444f65
bdd2e75
b444f65
bdd2e75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# files_process.py
import os
from typing import List, Dict

import gradio as gr
import pandas as pd

from agents import QuestionExtractionEngine, AnsweringEngine


# ---- Gradio handlers ----

def extract_questions_handler(guideline_pdf, surgery_type, suggestions):
    """
    Returns (questions_list, questions_dataframe)
    """
    if guideline_pdf is None:
        raise gr.Error("Please upload a surgical guideline PDF (e.g., Knee Arthroplasty guideline).")

    pdf_path = guideline_pdf.name if hasattr(guideline_pdf, "name") else str(guideline_pdf)
    q_engine = QuestionExtractionEngine(pdf_path, surgery_type or "Procedure", user_suggestions=suggestions or "")
    questions: List[str] = q_engine.run()

    if not questions:
        raise gr.Error("I couldn't extract any questions. Try adding suggestions or another guideline PDF.")

    df = pd.DataFrame({"#": list(range(1, len(questions) + 1)), "Question": questions})
    return questions, df


def answer_questions_handler(patient_pdf, questions_state):
    """
    Returns a DataFrame: Question | Answer | Rationale
    """
    if patient_pdf is None:
        raise gr.Error("Please upload a patient chart PDF.")
    if not questions_state:
        raise gr.Error("No questions available. Extract questions first.")

    pdf_path = patient_pdf.name if hasattr(patient_pdf, "name") else str(patient_pdf)
    a_engine = AnsweringEngine(pdf_path)

    rows = []
    for q in questions_state:
        res: Dict[str, Dict[str, str]] = a_engine.answer_one(q)
        ans = res.get(q, {})
        rows.append({
            "Question": q,
            "Answer": ans.get("answer", "Not Found"),
            "Rationale": ans.get("rationale", "—")
        })

    df = pd.DataFrame(rows, columns=["Question", "Answer", "Rationale"])
    return df


with gr.Blocks(title="Guideline → Questions → Chart Answers", theme="soft") as demo:
    gr.Markdown("# 🏥 Guideline Q&A (Yes/No) — with Iterative Feedback")

    with gr.Tabs():
        with gr.Tab("1) Extract Questions"):
            with gr.Row():
                guideline_pdf = gr.File(label="Upload Guideline PDF (e.g., Knee_Arthroplasty.pdf)", file_count="single")
                surgery_type = gr.Textbox(label="Surgery Type", value="Knee Arthroplasty")
            suggestions = gr.Textbox(
                label="Suggestions (optional)",
                placeholder="E.g., 'Focus on contraindications and pre-op lab thresholds; keep questions short.'"
            )
            extract_btn = gr.Button("Extract Yes/No Questions", variant="primary")

            questions_state = gr.State([])  # list[str]
            questions_df = gr.Dataframe(headers=["#", "Question"], interactive=False, wrap=True)
            gr.Markdown("If the questions need refinement, update 'Suggestions' and click the button again.")

            def _extract_and_store(g_pdf, s_type, sugg):
                q_list, df = extract_questions_handler(g_pdf, s_type, sugg)
                return q_list, df

            extract_btn.click(
                fn=_extract_and_store,
                inputs=[guideline_pdf, surgery_type, suggestions],
                outputs=[questions_state, questions_df],
            )

        with gr.Tab("2) Answer from Patient Chart"):
            patient_pdf = gr.File(label="Upload Patient Chart PDF (e.g., JAY_MORGAN.pdf)", file_count="single")
            answer_btn = gr.Button("Answer All Questions", variant="primary")
            answers_df = gr.Dataframe(headers=["Question", "Answer", "Rationale"], interactive=False, wrap=True)

            answer_btn.click(
                fn=answer_questions_handler,
                inputs=[patient_pdf, questions_state],
                outputs=answers_df,
            )

    gr.Markdown(
        "### Notes\n"
        "- Embeddings are cached under `./embeddings/<PDF-name>/` to avoid recomputation.\n"
        "- Set your OpenAI key: `export OPENAI_API_KEY=...` before running.\n"
        "- If a PDF is scanned/image-only, text extraction may be poor (consider OCR pre-processing).\n"
    )

if __name__ == "__main__":
    # Optional: set server params via env (Arch-friendly)
    # os.environ.setdefault("GRADIO_SERVER_NAME", "0.0.0.0")
    # os.environ.setdefault("GRADIO_SERVER_PORT", "7860")
    demo.launch()