MCG-Assist-Question-Answering

Sleeping

App Files Files Community

MCG-Assist-Question-Answering / files_process.py

abjasrees

Update files_process.py

bdd2e75 verified 7 months ago

raw

history blame contribute delete

4.3 kB

	# files_process.py
	import os
	from typing import List, Dict

	import gradio as gr
	import pandas as pd

	from agents import QuestionExtractionEngine, AnsweringEngine


	# ---- Gradio handlers ----

	def extract_questions_handler(guideline_pdf, surgery_type, suggestions):
	"""
	Returns (questions_list, questions_dataframe)
	"""
	if guideline_pdf is None:
	raise gr.Error("Please upload a surgical guideline PDF (e.g., Knee Arthroplasty guideline).")

	pdf_path = guideline_pdf.name if hasattr(guideline_pdf, "name") else str(guideline_pdf)
	q_engine = QuestionExtractionEngine(pdf_path, surgery_type or "Procedure", user_suggestions=suggestions or "")
	questions: List[str] = q_engine.run()

	if not questions:
	raise gr.Error("I couldn't extract any questions. Try adding suggestions or another guideline PDF.")

	df = pd.DataFrame({"#": list(range(1, len(questions) + 1)), "Question": questions})
	return questions, df


	def answer_questions_handler(patient_pdf, questions_state):
	"""
	Returns a DataFrame: Question \| Answer \| Rationale
	"""
	if patient_pdf is None:
	raise gr.Error("Please upload a patient chart PDF.")
	if not questions_state:
	raise gr.Error("No questions available. Extract questions first.")

	pdf_path = patient_pdf.name if hasattr(patient_pdf, "name") else str(patient_pdf)
	a_engine = AnsweringEngine(pdf_path)

	rows = []
	for q in questions_state:
	res: Dict[str, Dict[str, str]] = a_engine.answer_one(q)
	ans = res.get(q, {})
	rows.append({
	"Question": q,
	"Answer": ans.get("answer", "Not Found"),
	"Rationale": ans.get("rationale", "—")
	})

	df = pd.DataFrame(rows, columns=["Question", "Answer", "Rationale"])
	return df


	with gr.Blocks(title="Guideline → Questions → Chart Answers", theme="soft") as demo:
	gr.Markdown("# 🏥 Guideline Q&A (Yes/No) — with Iterative Feedback")

	with gr.Tabs():
	with gr.Tab("1) Extract Questions"):
	with gr.Row():
	guideline_pdf = gr.File(label="Upload Guideline PDF (e.g., Knee_Arthroplasty.pdf)", file_count="single")
	surgery_type = gr.Textbox(label="Surgery Type", value="Knee Arthroplasty")
	suggestions = gr.Textbox(
	label="Suggestions (optional)",
	placeholder="E.g., 'Focus on contraindications and pre-op lab thresholds; keep questions short.'"
	)
	extract_btn = gr.Button("Extract Yes/No Questions", variant="primary")

	questions_state = gr.State([]) # list[str]
	questions_df = gr.Dataframe(headers=["#", "Question"], interactive=False, wrap=True)
	gr.Markdown("If the questions need refinement, update 'Suggestions' and click the button again.")

	def _extract_and_store(g_pdf, s_type, sugg):
	q_list, df = extract_questions_handler(g_pdf, s_type, sugg)
	return q_list, df

	extract_btn.click(
	fn=_extract_and_store,
	inputs=[guideline_pdf, surgery_type, suggestions],
	outputs=[questions_state, questions_df],
	)

	with gr.Tab("2) Answer from Patient Chart"):
	patient_pdf = gr.File(label="Upload Patient Chart PDF (e.g., JAY_MORGAN.pdf)", file_count="single")
	answer_btn = gr.Button("Answer All Questions", variant="primary")
	answers_df = gr.Dataframe(headers=["Question", "Answer", "Rationale"], interactive=False, wrap=True)

	answer_btn.click(
	fn=answer_questions_handler,
	inputs=[patient_pdf, questions_state],
	outputs=answers_df,
	)

	gr.Markdown(
	"### Notes\n"
	"- Embeddings are cached under `./embeddings/<PDF-name>/` to avoid recomputation.\n"
	"- Set your OpenAI key: `export OPENAI_API_KEY=...` before running.\n"
	"- If a PDF is scanned/image-only, text extraction may be poor (consider OCR pre-processing).\n"
	)

	if __name__ == "__main__":
	# Optional: set server params via env (Arch-friendly)
	# os.environ.setdefault("GRADIO_SERVER_NAME", "0.0.0.0")
	# os.environ.setdefault("GRADIO_SERVER_PORT", "7860")
	demo.launch()