tccm-app / app.py
AnishSPIT's picture
Sync from GitHub via hub-sync
a7b7d8d verified
# app.py [WAIVER: none β€” well within 100-line ceiling]
import json
import tempfile
import gradio as gr
from prompts import THEORY_PROMPT, CHARACTERISTICS_PROMPT, METHOD_PROMPT
from models import EXTRACTION_MODELS, CONSOLIDATION_MODEL
from agents.tccm_agent import tccm_graph, GraphState
from agents.council_agent import council_graph, CouncilState
from utils.pdf_reader import load_papers, load_inventory
from utils.excel_writer import export_excel
PROMPT_MAP = {
"Theory": THEORY_PROMPT,
"Characteristics": CHARACTERISTICS_PROMPT,
"Method": METHOD_PROMPT,
}
MODEL_NAMES = list(
EXTRACTION_MODELS.keys()
) # ["Mistral-7B", "Zephyr-7B", "Qwen2.5-7B"]
# ── Core orchestration (UI glue β€” not agent logic) ───────────────────────────
def _extract_one(pdfs, inventory_file, journal, prompt_type, model_name):
papers = load_papers([p.name for p in pdfs]) if pdfs else []
inventory = load_inventory(inventory_file.name) if inventory_file else ""
result = tccm_graph.invoke(
GraphState(
papers=papers,
prompt_template=PROMPT_MAP[prompt_type],
model_id=EXTRACTION_MODELS[model_name],
journal=journal,
inventory=inventory,
results=[],
) # ty:ignore[invalid-argument-type]
)
return result["results"]
def run_council(pdfs, inventory_file, journal, prompt_type, progress=gr.Progress()):
progress(0.05, desc=f"Extracting with {MODEL_NAMES[0]}…")
s1 = _extract_one(pdfs, inventory_file, journal, prompt_type, MODEL_NAMES[0])
progress(0.38, desc=f"Extracting with {MODEL_NAMES[1]}…")
s2 = _extract_one(pdfs, inventory_file, journal, prompt_type, MODEL_NAMES[1])
progress(0.68, desc=f"Extracting with {MODEL_NAMES[2]}…")
s3 = _extract_one(pdfs, inventory_file, journal, prompt_type, MODEL_NAMES[2])
progress(0.85, desc=f"Consolidating with {CONSOLIDATION_MODEL}…")
s4 = council_graph.invoke(
CouncilState(sheet1=s1, sheet2=s2, sheet3=s3, consolidated=[]) # ty:ignore[invalid-argument-type]
)["consolidated"]
progress(0.95, desc="Writing Excel…")
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False, prefix="tccm_") as f:
xlsx = export_excel(s1, s2, s3, s4, f.name)
progress(1.0, desc="Done βœ“")
return (
json.dumps(s1[:5], indent=2),
json.dumps(s2[:5], indent=2),
json.dumps(s3[:5], indent=2),
json.dumps(s4[:5], indent=2),
xlsx,
)
# ── Gradio UI ─────────────────────────────────────────────────────────────────
with gr.Blocks(title="Agentic TCCM Extractor β€” LLM Council") as demo:
gr.Markdown(
"# πŸ“š Agentic TCCM Extractor\n"
f"**Council:** {MODEL_NAMES[0]} Β· {MODEL_NAMES[1]} Β· {MODEL_NAMES[2]} β†’ "
f"**Consolidator:** `{CONSOLIDATION_MODEL}` Β· Set `HF_TOKEN` in Space secrets."
)
with gr.Row():
with gr.Column(scale=1):
pdfs = gr.File(
label="Paper PDFs", file_count="multiple", file_types=[".pdf"]
)
inventory_f = gr.File(
label="IS_Theories (.xlsx / .txt)", file_types=[".xlsx", ".txt", ".csv"]
)
journal = gr.Textbox(label="Journal Name", value="MIS Quarterly")
prompt_type = gr.Radio(
["Theory", "Characteristics", "Method"],
label="Extraction Prompt",
value="Theory",
)
run_btn = gr.Button("β–Ά Run Full LLM Council", variant="primary")
with gr.Column(scale=2):
with gr.Tabs():
with gr.Tab(f"Sheet 1 β€” {MODEL_NAMES[0]}"):
out1 = gr.JSON(label="Preview (first 5 rows)")
with gr.Tab(f"Sheet 2 β€” {MODEL_NAMES[1]}"):
out2 = gr.JSON(label="Preview (first 5 rows)")
with gr.Tab(f"Sheet 3 β€” {MODEL_NAMES[2]}"):
out3 = gr.JSON(label="Preview (first 5 rows)")
with gr.Tab("Sheet 4 β€” Consolidated"):
out4 = gr.JSON(label="Preview (first 5 rows)")
xlsx_out = gr.File(label="⬇ Download 4-Sheet Excel")
run_btn.click(
fn=run_council,
inputs=[pdfs, inventory_f, journal, prompt_type],
outputs=[out1, out2, out3, out4, xlsx_out],
)
if __name__ == "__main__":
demo.launch()