FinalMultiAgent / app.py
shahidshaikh's picture
Update app.py
5de7c24 verified
import os, json, shutil
import pandas as pd
import gradio as gr
from agent import chat_with_agent
from tools import CSV_PATH, OUT_DIR, PDF_DIR, _embed, HEADERS
try: _embed()
except Exception as e: print(f">>> WARNING: Embedding pre-load failed: {e}")
CSS = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=Plus+Jakarta+Sans:wght@600;700&display=swap');
:root { --primary: #f8fafc; --secondary: #818cf8; --accent: #a78bfa; --bg: #0b0f19; --panel: #111827; --border: rgba(255,255,255,0.1); }
body, .gradio-container { font-family: 'Inter', sans-serif !important; background-color: var(--bg) !important; color: var(--primary) !important; }
.hdr { font-family: 'Plus Jakarta Sans', sans-serif !important; font-size: 2.25rem !important; font-weight: 700; color: white !important; letter-spacing: -0.04em; margin-bottom: 2px !important; }
.sub-hdr { font-size: 0.875rem !important; color: #94a3b8 !important; margin-bottom: 24px !important; }
.sidebar { background: var(--panel) !important; border-right: 1px solid var(--border) !important; padding: 24px !important; }
.main-content { background: var(--panel) !important; border-radius: 12px !important; border: 1px solid var(--border) !important; box-shadow: 0 4px 20px rgba(0,0,0,0.4) !important; }
button.primary { background: var(--secondary) !important; color: white !important; border-radius: 8px !important; font-weight: 600 !important; border: none !important; transition: all 0.2s; }
button.primary:hover { filter: brightness(1.1); transform: translateY(-1px); }
.phase-orb { display: flex; align-items: center; gap: 8px; font-size: 0.75rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; color: #475569; margin-bottom: 12px; }
.phase-orb.active { color: var(--secondary); }
.phase-dot { width: 8px; height: 8px; border-radius: 50%; background: #334155; }
.phase-orb.active .phase-dot { background: var(--secondary); box-shadow: 0 0 10px var(--secondary); }
input, textarea, .gr-box, .gr-form, label, span, p, .markdown-text, h1, h2, h3 { color: #e2e8f0 !important; }
input, textarea { border-radius: 8px !important; border: 1px solid #334155 !important; background: #1e293b !important; }
input:focus { border-color: var(--secondary) !important; box-shadow: 0 0 0 2px rgba(129, 140, 248, 0.2) !important; }
.gr-button-secondary { background: #1e293b !important; color: #cbd5e1 !important; border: 1px solid #334155 !important; }
.gr-button-secondary:hover { background: #334155 !important; }
"""
HDR = '<div class="hdr">Digital Curator</div><div class="sub-hdr">Enterprise Agentic Research Platform</div>'
PRG_TPL = '<div style="display: flex; flex-direction: column; gap: 4px;">' \
'<div class="phase-orb {a1}"><div class="phase-dot"></div>1. Ingestion</div>' \
'<div class="phase-orb {a2}"><div class="phase-dot"></div>2. Algorithmic Coding</div>' \
'<div class="phase-orb {a3}"><div class="phase-dot"></div>3. Thematic Abstraction</div>' \
'<div class="phase-orb {a4}"><div class="phase-dot"></div>4. Report Narration</div>' \
'</div>'
PHASE_FILE = {"CODES": "_labels.json", "THEMES": "_themes.json", "PAJAIS": "_taxonomy.json"}
NEXT_PHASE = {"CODES": "THEMES", "THEMES": "PAJAIS", "PAJAIS": "REPORT"}
def get_phase_html(current_phase):
mapping = {"INGEST": "a1", "CODES": "a2", "THEMES": "a3", "PAJAIS": "a3", "REPORT": "a4"}
active = mapping.get(current_phase, "a1")
return PRG_TPL.format(**{k: "active" if k == active else "" for k in ["a1", "a2", "a3", "a4"]})
def init_analysis(query, csv_file, chat_hist):
# 1. Clear old outputs
if os.path.exists(OUT_DIR):
for f in os.listdir(OUT_DIR):
fp = os.path.join(OUT_DIR, f)
if os.path.isfile(fp) and f != ".gitkeep": os.remove(fp)
# 2. Query Sanitization
query = (query or "").strip()
if len(query) < 3 and not csv_file:
return "Please enter a valid research topic or upload a CSV.", chat_hist, pd.DataFrame()
# 3. Detection and Logic
mode = "Hybrid" if (query and csv_file) else ("CSV" if csv_file else "Query")
sources = {}
total_papers = 0
# Handle CSV primary load
if csv_file:
try:
df = pd.read_csv(csv_file.name)
df = df[HEADERS[:len(df.columns)]] if len(df.columns) < len(HEADERS) else df[HEADERS]
df["Sr No"] = range(1, len(df)+1)
df["Source"] = "Uploaded CSV"
df.to_csv(CSV_PATH, index=False)
sources["CSV"] = len(df)
total_papers = len(df)
except Exception as e: return f"CSV Error: {e}", chat_hist, pd.DataFrame()
else:
# Initialize empty CSV if query only (do not overwrite if it already exists)
if not os.path.exists(CSV_PATH):
pd.DataFrame(columns=HEADERS + ["Source"]).to_csv(CSV_PATH, index=False)
print(f"[MODE]: {mode}")
# 4. Trigger Agent for Phase 1 (Fetch/Merge) and auto-advance
# We send a specific command to the agent to start the ingestion
trigger_msg = f"Start research on topic: '{query or 'AI and Societal Impact'}'. "
if csv_file: trigger_msg += "I have uploaded a CSV dataset; merge new findings if needed. "
trigger_msg += "Enforce MAX_TOTAL_PAPERS=120. Once papers are saved, proceed directly to Phase 2 (Clustering) using abstract mode."
res = chat_with_agent(trigger_msg, "ux_session")
# 5. Calculate Final Feedback and Visibility
if os.path.exists(CSV_PATH):
df_final = pd.read_csv(CSV_PATH)
total_papers = len(df_final)
# Detailed source breakdown from the "Source" column
if "Source" in df_final.columns:
counts = df_final["Source"].value_counts().to_dict()
for s, c in counts.items(): sources[s] = c
print(f"[PAPERS]: {total_papers}")
source_lines = "\n".join([f"- {s}: {c} papers" for s, c in sources.items()])
feedback = (f"Detected Mode: {mode}\nSources:\n{source_lines}\nTotal: {total_papers} papers\n\n👉 Preparing dataset... Proceeding to Phase 2")
new_hist = chat_hist + [{"role":"user","content": f"Init {mode}"}, {"role":"assistant","content": feedback + "\n\n" + res}]
return "", new_hist, _load_table("abstract", "CODES"), get_phase_html("CODES")
def handle_pdf_upload(files):
if not files: return "No files selected."
saved = []
for f in files:
ext = os.path.splitext(f.name)[1].lower()
if ext in [".pdf", ".docx"]:
dest = os.path.join(PDF_DIR, os.path.basename(f.name))
shutil.copy(f.name, dest)
saved.append(os.path.basename(f.name))
return f"✅ {len(saved)} file(s) saved. Click '📑 Ingest Uploads' below or ask the agent."
def _paper_titles_by_cluster(data: dict) -> dict:
if not os.path.exists(CSV_PATH): return {}
papers = pd.read_csv(CSV_PATH)[["Sr No", "Title"]].fillna("")
title_map = {str(int(r["Sr No"])): r["Title"] for _, r in papers.iterrows()}
return {k: [title_map.get(str(pid), f"#{pid}") for pid in v.get("paper_ids", [])] for k, v in data.items()}
def _load_table(mode: str, phase: str) -> pd.DataFrame:
try:
path = os.path.join(OUT_DIR, mode + PHASE_FILE.get(phase, f"_{phase.lower()}.json"))
if not os.path.exists(path): return pd.DataFrame()
data = json.load(open(path, encoding="utf-8"))
if phase == "CODES":
cluster_papers = _paper_titles_by_cluster(data)
rows = []
for k, v in data.items():
titles = cluster_papers.get(k, [])
rows.append({"ID": k, "Label": v.get("label", ""), "Confidence": v.get("confidence", ""), "Reasoning": v.get("reasoning", ""), "Paper Count": len(titles), "Paper Titles": " | ".join(titles), "Approve": "yes", "Rename To": ""})
return pd.DataFrame(rows)
if phase == "THEMES": return pd.DataFrame(data.get("themes", []))
items = data if isinstance(data, list) else list(data.values())
return pd.DataFrame([{"Theme": v.get("name",""), "Category": v.get("pajais_category",""), "Confidence": v.get("confidence","")} for v in items])
except: return pd.DataFrame()
def handle_chat(msg, hist, mode, ph):
try:
res = chat_with_agent(msg, "ux_session")
hist += [{"role":"user","content":msg}, {"role":"assistant","content":res}]
return "", hist, _load_table(mode, ph)
except Exception as e: return "", hist+[{"role":"assistant","content":f"Error: {e}"}], gr.update()
def submit_review(df, hist, mode, ph):
nxt = NEXT_PHASE.get(ph, ph)
msg = f"User reviewed {ph}. Proceed to Phase {nxt}."
if ph == "CODES" and df is not None and not df.empty and "Approve" in df.columns:
renames = df[(df["Approve"].astype(str).str.lower() != "yes") & (df["Rename To"].astype(str).str.strip() != "")]
if not renames.empty:
changes = "; ".join(f'Cluster {r["ID"]}: rename label to "{r["Rename To"]}"' for _, r in renames.iterrows())
msg = (f"User reviewed {ph} and rejected some labels. Apply these changes: {changes}. Save and proceed to {nxt}.")
_, new_hist, _ = handle_chat(msg, hist, mode, ph)
return new_hist, nxt, _load_table(mode, nxt)
def _get_files(): return [os.path.join(OUT_DIR, f) for f in os.listdir(OUT_DIR)]
def _get_report():
p = next((os.path.join(OUT_DIR, n) for n in ["narrative.txt","consolidated_report.txt"] if os.path.exists(os.path.join(OUT_DIR, n))), None)
return f"### 📄 Latest Narrative Report\n\n{open(p, encoding='utf-8').read()}" if p else "### 📄 Narrative Report\n(Not generated yet)"
def _show_chart(m, t):
path = os.path.join(OUT_DIR, f"{m}_{t}.html")
return f'<iframe srcdoc="{open(path, encoding="utf-8").read().replace(chr(34),"&quot;")}" width="100%" height="450"></iframe>' if os.path.exists(path) else "Run Phase 2 first."
def _load_curation() -> pd.DataFrame:
if os.path.exists(CSV_PATH):
df = pd.read_csv(CSV_PATH)
cols = ["Sr No", "Title", "Web Link", "Research Type", "Research Type Confidence", "Research Type Reason", "Findings"]
for c in cols:
if c not in df.columns: df[c] = ""
return df[cols]
return pd.DataFrame()
def _save_curation(updated_df: pd.DataFrame):
if os.path.exists(CSV_PATH) and not updated_df.empty:
full_df = pd.read_csv(CSV_PATH)
for _, row in updated_df.iterrows():
idx = full_df[full_df["Sr No"] == row["Sr No"]].index
if not idx.empty:
full_df.loc[idx[0], "Research Type"] = row.get("Research Type", "")
full_df.loc[idx[0], "Research Type Confidence"] = row.get("Research Type Confidence", "")
full_df.loc[idx[0], "Research Type Reason"] = row.get("Research Type Reason", "")
full_df.loc[idx[0], "Findings"] = row.get("Findings", "")
full_df.to_csv(CSV_PATH, index=False)
return gr.Info("✅ Classification Overrides Saved!")
return gr.Warning("No data to save.")
with gr.Blocks() as demo:
with gr.Row():
# LEFT SIDEBAR
with gr.Column(scale=1, elem_classes="sidebar"):
gr.HTML(HDR)
prg_display = gr.HTML(get_phase_html("INGEST"))
gr.HTML('<div style="height: 24px; border-top: 1px solid #f1f5f9; margin-top: 24px;"></div>')
input_box = gr.Textbox(placeholder="Enter research topic...", label="Research Query")
start_btn = gr.Button("Execute Discovery", variant="primary")
gr.HTML('<div style="height: 32px"></div>')
with gr.Accordion("Artifact Uploads", open=False):
csv_up = gr.File(label="Upload CSV Structure", file_types=[".csv"])
pdf_up = gr.File(label="Upload Raw Papers (PDF/Docx)", file_types=[".pdf", ".docx"], file_count="multiple")
pdf_status = gr.Markdown()
pdf_up.upload(handle_pdf_upload, [pdf_up], [pdf_status])
gr.HTML('<div style="height: 32px"></div>')
gr.HTML('<div class="sub-hdr">Quick Actions</div>')
btn_ingest = gr.Button("📑 Ingest Uploads", variant="secondary")
btn_scratch = gr.Button("📂 Import Scratch", variant="secondary")
btn_analyze = gr.Button("▶ Run Analysis", variant="secondary")
# RIGHT MAIN WORKSPACE
with gr.Column(scale=3, elem_classes="main-content"):
with gr.Tabs(elem_classes="tab-nav"):
with gr.Tab("💬 Agent Copilot"):
chatbot = gr.Chatbot([], height=500, label="Research Assistant")
with gr.Row():
msg_in = gr.Textbox(placeholder="Directly ask the agent to search, review, or summarize...", show_label=False, scale=5)
send_btn = gr.Button("Send", variant="primary", scale=1)
with gr.Tab("🗂️ Data Curation"):
curation_df = gr.Dataframe(interactive=True, label="Research Classification Review")
gr.Markdown("<br>**💡 Editorial Note:** Review the Agent's reasoning. Modify 'Research Type' if you disagree, then save.")
with gr.Row():
refresh_cur_btn = gr.Button("🔄 Load Ingestions", variant="secondary")
save_cur_btn = gr.Button("💾 Save Overrides", variant="primary")
refresh_cur_btn.click(_load_curation, [], [curation_df])
save_cur_btn.click(_save_curation, [curation_df], [])
with gr.Tab("📋 Research Nodes"):
ph_state = gr.State("CODES")
mode_btn = gr.Radio(["abstract","title"], label="Review Mode", value="abstract")
tbl = gr.Dataframe(interactive=True)
gr.Markdown("<br>**💡 Editorial Note:** Reject a label by setting **Approve** = 'no' and filling **Rename To**.")
with gr.Row():
refresh_tbl_btn = gr.Button("🔄 Sync Data", variant="secondary")
sub_btn = gr.Button("Commit & Advance", variant="primary")
refresh_tbl_btn.click(_load_table, [mode_btn, ph_state], [tbl])
with gr.Tab("📊 Abstraction Vectors"):
with gr.Row():
cm = gr.Dropdown(["abstract","title"], value="abstract", label="Mode", scale=1)
ct = gr.Dropdown(["intertopic","heatmap","dendrogram"], value="intertopic", label="Vector Chart Type", scale=2)
chart_out = gr.HTML()
cm.change(_show_chart, [cm, ct], [chart_out]); ct.change(_show_chart, [cm, ct], [chart_out])
with gr.Tab("📄 Synthesis Report"):
btn_report = gr.Button("Generate Narrative", variant="primary")
report_box = gr.Markdown("### 📄 Narrative Report\n(Waiting for Phase 6 completion)")
btn_report.click(_get_report, [], [report_box])
with gr.Tab("📥 Archival Exports"):
dl = gr.File(label="Artifacts", file_count="multiple", interactive=False)
btn_refresh_files = gr.Button("🔄 Sync Archive", variant="secondary")
btn_refresh_files.click(_get_files, [], [dl])
# Event Bindings
start_btn.click(init_analysis, [input_box, csv_up, chatbot], [input_box, chatbot, tbl, prg_display])
send_btn.click(handle_chat, [msg_in, chatbot, mode_btn, ph_state], [msg_in, chatbot, tbl])
btn_ingest.click(handle_chat, [gr.State("Ingest all uploaded PDFs and Word documents into the system."), chatbot, gr.State("abstract"), gr.State("CODES")], [msg_in, chatbot, tbl])
btn_scratch.click(handle_chat, [gr.State("Check the local scratch folder and import any papers found there."), chatbot, gr.State("abstract"), gr.State("CODES")], [msg_in, chatbot, tbl])
btn_analyze.click(handle_chat, [gr.State("run abstract only"), chatbot, gr.State("abstract"), gr.State("CODES")], [msg_in, chatbot, tbl])
sub_btn.click(submit_review, [tbl, chatbot, mode_btn, ph_state], [chatbot, ph_state, tbl]).then(_get_report, [], [report_box])
if __name__ == "__main__":
# Standard HF Space deployment settings
theme = gr.themes.Soft(primary_hue="indigo", neutral_hue="slate", font=["Inter", "ui-sans-serif", "system-ui"])
# Launch with dynamic port discovery and HF-standard theme
demo.launch(
theme=theme,
css=CSS
)