Spaces:

shahidshaikh
/

FinalMultiAgent

Sleeping

File size: 16,685 Bytes

import os, json, shutil
import pandas as pd
import gradio as gr
from agent import chat_with_agent
from tools import CSV_PATH, OUT_DIR, PDF_DIR, _embed, HEADERS

try: _embed()
except Exception as e: print(f">>> WARNING: Embedding pre-load failed: {e}")

CSS = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=Plus+Jakarta+Sans:wght@600;700&display=swap');
:root { --primary: #f8fafc; --secondary: #818cf8; --accent: #a78bfa; --bg: #0b0f19; --panel: #111827; --border: rgba(255,255,255,0.1); }
body, .gradio-container { font-family: 'Inter', sans-serif !important; background-color: var(--bg) !important; color: var(--primary) !important; }
.hdr { font-family: 'Plus Jakarta Sans', sans-serif !important; font-size: 2.25rem !important; font-weight: 700; color: white !important; letter-spacing: -0.04em; margin-bottom: 2px !important; }
.sub-hdr { font-size: 0.875rem !important; color: #94a3b8 !important; margin-bottom: 24px !important; }
.sidebar { background: var(--panel) !important; border-right: 1px solid var(--border) !important; padding: 24px !important; }
.main-content { background: var(--panel) !important; border-radius: 12px !important; border: 1px solid var(--border) !important; box-shadow: 0 4px 20px rgba(0,0,0,0.4) !important; }
button.primary { background: var(--secondary) !important; color: white !important; border-radius: 8px !important; font-weight: 600 !important; border: none !important; transition: all 0.2s; }
button.primary:hover { filter: brightness(1.1); transform: translateY(-1px); }
.phase-orb { display: flex; align-items: center; gap: 8px; font-size: 0.75rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; color: #475569; margin-bottom: 12px; }
.phase-orb.active { color: var(--secondary); }
.phase-dot { width: 8px; height: 8px; border-radius: 50%; background: #334155; }
.phase-orb.active .phase-dot { background: var(--secondary); box-shadow: 0 0 10px var(--secondary); }
input, textarea, .gr-box, .gr-form, label, span, p, .markdown-text, h1, h2, h3 { color: #e2e8f0 !important; }
input, textarea { border-radius: 8px !important; border: 1px solid #334155 !important; background: #1e293b !important; }
input:focus { border-color: var(--secondary) !important; box-shadow: 0 0 0 2px rgba(129, 140, 248, 0.2) !important; }
.gr-button-secondary { background: #1e293b !important; color: #cbd5e1 !important; border: 1px solid #334155 !important; }
.gr-button-secondary:hover { background: #334155 !important; }
"""
HDR = '<div class="hdr">Digital Curator</div><div class="sub-hdr">Enterprise Agentic Research Platform</div>'
PRG_TPL = '<div style="display: flex; flex-direction: column; gap: 4px;">' \
          '<div class="phase-orb {a1}"><div class="phase-dot"></div>1. Ingestion</div>' \
          '<div class="phase-orb {a2}"><div class="phase-dot"></div>2. Algorithmic Coding</div>' \
          '<div class="phase-orb {a3}"><div class="phase-dot"></div>3. Thematic Abstraction</div>' \
          '<div class="phase-orb {a4}"><div class="phase-dot"></div>4. Report Narration</div>' \
          '</div>'

PHASE_FILE = {"CODES": "_labels.json", "THEMES": "_themes.json", "PAJAIS": "_taxonomy.json"}
NEXT_PHASE = {"CODES": "THEMES", "THEMES": "PAJAIS", "PAJAIS": "REPORT"}

def get_phase_html(current_phase):
    mapping = {"INGEST": "a1", "CODES": "a2", "THEMES": "a3", "PAJAIS": "a3", "REPORT": "a4"}
    active = mapping.get(current_phase, "a1")
    return PRG_TPL.format(**{k: "active" if k == active else "" for k in ["a1", "a2", "a3", "a4"]})

def init_analysis(query, csv_file, chat_hist):
    # 1. Clear old outputs
    if os.path.exists(OUT_DIR):
        for f in os.listdir(OUT_DIR):
            fp = os.path.join(OUT_DIR, f)
            if os.path.isfile(fp) and f != ".gitkeep": os.remove(fp)
    
    # 2. Query Sanitization
    query = (query or "").strip()
    if len(query) < 3 and not csv_file:
        return "Please enter a valid research topic or upload a CSV.", chat_hist, pd.DataFrame()

    # 3. Detection and Logic
    mode = "Hybrid" if (query and csv_file) else ("CSV" if csv_file else "Query")
    sources = {}
    total_papers = 0
    
    # Handle CSV primary load
    if csv_file:
        try:
            df = pd.read_csv(csv_file.name)
            df = df[HEADERS[:len(df.columns)]] if len(df.columns) < len(HEADERS) else df[HEADERS]
            df["Sr No"] = range(1, len(df)+1)
            df["Source"] = "Uploaded CSV"
            df.to_csv(CSV_PATH, index=False)
            sources["CSV"] = len(df)
            total_papers = len(df)
        except Exception as e: return f"CSV Error: {e}", chat_hist, pd.DataFrame()
    else:
        # Initialize empty CSV if query only (do not overwrite if it already exists)
        if not os.path.exists(CSV_PATH):
            pd.DataFrame(columns=HEADERS + ["Source"]).to_csv(CSV_PATH, index=False)

    print(f"[MODE]: {mode}")
    
    # 4. Trigger Agent for Phase 1 (Fetch/Merge) and auto-advance
    # We send a specific command to the agent to start the ingestion
    trigger_msg = f"Start research on topic: '{query or 'AI and Societal Impact'}'. "
    if csv_file: trigger_msg += "I have uploaded a CSV dataset; merge new findings if needed. "
    trigger_msg += "Enforce MAX_TOTAL_PAPERS=120. Once papers are saved, proceed directly to Phase 2 (Clustering) using abstract mode."

    res = chat_with_agent(trigger_msg, "ux_session")
    
    # 5. Calculate Final Feedback and Visibility
    if os.path.exists(CSV_PATH):
        df_final = pd.read_csv(CSV_PATH)
        total_papers = len(df_final)
        # Detailed source breakdown from the "Source" column
        if "Source" in df_final.columns:
            counts = df_final["Source"].value_counts().to_dict()
            for s, c in counts.items(): sources[s] = c
    
    print(f"[PAPERS]: {total_papers}")
    
    source_lines = "\n".join([f"- {s}: {c} papers" for s, c in sources.items()])
    feedback = (f"Detected Mode: {mode}\nSources:\n{source_lines}\nTotal: {total_papers} papers\n\n👉 Preparing dataset... Proceeding to Phase 2")
    
    new_hist = chat_hist + [{"role":"user","content": f"Init {mode}"}, {"role":"assistant","content": feedback + "\n\n" + res}]
    return "", new_hist, _load_table("abstract", "CODES"), get_phase_html("CODES")

def handle_pdf_upload(files):
    if not files: return "No files selected."
    saved = []
    for f in files:
        ext = os.path.splitext(f.name)[1].lower()
        if ext in [".pdf", ".docx"]:
            dest = os.path.join(PDF_DIR, os.path.basename(f.name))
            shutil.copy(f.name, dest)
            saved.append(os.path.basename(f.name))
    return f"✅ {len(saved)} file(s) saved. Click '📑 Ingest Uploads' below or ask the agent."

def _paper_titles_by_cluster(data: dict) -> dict:
    if not os.path.exists(CSV_PATH): return {}
    papers = pd.read_csv(CSV_PATH)[["Sr No", "Title"]].fillna("")
    title_map = {str(int(r["Sr No"])): r["Title"] for _, r in papers.iterrows()}
    return {k: [title_map.get(str(pid), f"#{pid}") for pid in v.get("paper_ids", [])] for k, v in data.items()}

def _load_table(mode: str, phase: str) -> pd.DataFrame:
    try:
        path = os.path.join(OUT_DIR, mode + PHASE_FILE.get(phase, f"_{phase.lower()}.json"))
        if not os.path.exists(path): return pd.DataFrame()
        data = json.load(open(path, encoding="utf-8"))
        if phase == "CODES":
            cluster_papers = _paper_titles_by_cluster(data)
            rows = []
            for k, v in data.items():
                titles = cluster_papers.get(k, [])
                rows.append({"ID": k, "Label": v.get("label", ""), "Confidence": v.get("confidence", ""), "Reasoning": v.get("reasoning", ""), "Paper Count": len(titles), "Paper Titles": " | ".join(titles), "Approve": "yes", "Rename To": ""})
            return pd.DataFrame(rows)
        if phase == "THEMES": return pd.DataFrame(data.get("themes", []))
        items = data if isinstance(data, list) else list(data.values())
        return pd.DataFrame([{"Theme": v.get("name",""), "Category": v.get("pajais_category",""), "Confidence": v.get("confidence","")} for v in items])
    except: return pd.DataFrame()

def handle_chat(msg, hist, mode, ph):
    try:
        res = chat_with_agent(msg, "ux_session")
        hist += [{"role":"user","content":msg}, {"role":"assistant","content":res}]
        return "", hist, _load_table(mode, ph)
    except Exception as e: return "", hist+[{"role":"assistant","content":f"Error: {e}"}], gr.update()

def submit_review(df, hist, mode, ph):
    nxt = NEXT_PHASE.get(ph, ph)
    msg = f"User reviewed {ph}. Proceed to Phase {nxt}."
    if ph == "CODES" and df is not None and not df.empty and "Approve" in df.columns:
        renames = df[(df["Approve"].astype(str).str.lower() != "yes") & (df["Rename To"].astype(str).str.strip() != "")]
        if not renames.empty:
            changes = "; ".join(f'Cluster {r["ID"]}: rename label to "{r["Rename To"]}"' for _, r in renames.iterrows())
            msg = (f"User reviewed {ph} and rejected some labels. Apply these changes: {changes}. Save and proceed to {nxt}.")
    _, new_hist, _ = handle_chat(msg, hist, mode, ph)
    return new_hist, nxt, _load_table(mode, nxt)

def _get_files(): return [os.path.join(OUT_DIR, f) for f in os.listdir(OUT_DIR)]
def _get_report():
    p = next((os.path.join(OUT_DIR, n) for n in ["narrative.txt","consolidated_report.txt"] if os.path.exists(os.path.join(OUT_DIR, n))), None)
    return f"### 📄 Latest Narrative Report\n\n{open(p, encoding='utf-8').read()}" if p else "### 📄 Narrative Report\n(Not generated yet)"
def _show_chart(m, t):
    path = os.path.join(OUT_DIR, f"{m}_{t}.html")
    return f'<iframe srcdoc="{open(path, encoding="utf-8").read().replace(chr(34),"&quot;")}" width="100%" height="450"></iframe>' if os.path.exists(path) else "Run Phase 2 first."

def _load_curation() -> pd.DataFrame:
    if os.path.exists(CSV_PATH):
        df = pd.read_csv(CSV_PATH)
        cols = ["Sr No", "Title", "Web Link", "Research Type", "Research Type Confidence", "Research Type Reason", "Findings"]
        for c in cols:
            if c not in df.columns: df[c] = ""
        return df[cols]
    return pd.DataFrame()

def _save_curation(updated_df: pd.DataFrame):
    if os.path.exists(CSV_PATH) and not updated_df.empty:
        full_df = pd.read_csv(CSV_PATH)
        for _, row in updated_df.iterrows():
            idx = full_df[full_df["Sr No"] == row["Sr No"]].index
            if not idx.empty:
                full_df.loc[idx[0], "Research Type"] = row.get("Research Type", "")
                full_df.loc[idx[0], "Research Type Confidence"] = row.get("Research Type Confidence", "")
                full_df.loc[idx[0], "Research Type Reason"] = row.get("Research Type Reason", "")
                full_df.loc[idx[0], "Findings"] = row.get("Findings", "")
        full_df.to_csv(CSV_PATH, index=False)
        return gr.Info("✅ Classification Overrides Saved!")
    return gr.Warning("No data to save.")

with gr.Blocks() as demo:
    with gr.Row():
        # LEFT SIDEBAR
        with gr.Column(scale=1, elem_classes="sidebar"):
            gr.HTML(HDR)
            prg_display = gr.HTML(get_phase_html("INGEST"))
            gr.HTML('<div style="height: 24px; border-top: 1px solid #f1f5f9; margin-top: 24px;"></div>')
            input_box = gr.Textbox(placeholder="Enter research topic...", label="Research Query")
            start_btn = gr.Button("Execute Discovery", variant="primary")
            
            gr.HTML('<div style="height: 32px"></div>')
            
            with gr.Accordion("Artifact Uploads", open=False):
                csv_up = gr.File(label="Upload CSV Structure", file_types=[".csv"])
                pdf_up = gr.File(label="Upload Raw Papers (PDF/Docx)", file_types=[".pdf", ".docx"], file_count="multiple")
                pdf_status = gr.Markdown()
                pdf_up.upload(handle_pdf_upload, [pdf_up], [pdf_status])
            
            gr.HTML('<div style="height: 32px"></div>')
            gr.HTML('<div class="sub-hdr">Quick Actions</div>')
            btn_ingest = gr.Button("📑 Ingest Uploads", variant="secondary")
            btn_scratch = gr.Button("📂 Import Scratch", variant="secondary")
            btn_analyze = gr.Button("▶ Run Analysis", variant="secondary")
            
        # RIGHT MAIN WORKSPACE
        with gr.Column(scale=3, elem_classes="main-content"):
            with gr.Tabs(elem_classes="tab-nav"):
                with gr.Tab("💬 Agent Copilot"):
                    chatbot = gr.Chatbot([], height=500, label="Research Assistant")
                    with gr.Row():
                        msg_in = gr.Textbox(placeholder="Directly ask the agent to search, review, or summarize...", show_label=False, scale=5)
                        send_btn = gr.Button("Send", variant="primary", scale=1)
                        
                with gr.Tab("🗂️ Data Curation"):
                    curation_df = gr.Dataframe(interactive=True, label="Research Classification Review")
                    gr.Markdown("<br>**💡 Editorial Note:** Review the Agent's reasoning. Modify 'Research Type' if you disagree, then save.")
                    with gr.Row():
                        refresh_cur_btn = gr.Button("🔄 Load Ingestions", variant="secondary")
                        save_cur_btn = gr.Button("💾 Save Overrides", variant="primary")
                    refresh_cur_btn.click(_load_curation, [], [curation_df])
                    save_cur_btn.click(_save_curation, [curation_df], [])
                        
                with gr.Tab("📋 Research Nodes"):
                    ph_state = gr.State("CODES")
                    mode_btn = gr.Radio(["abstract","title"], label="Review Mode", value="abstract")
                    tbl = gr.Dataframe(interactive=True)
                    gr.Markdown("<br>**💡 Editorial Note:** Reject a label by setting **Approve** = 'no' and filling **Rename To**.")
                    with gr.Row():
                        refresh_tbl_btn = gr.Button("🔄 Sync Data", variant="secondary")
                        sub_btn = gr.Button("Commit & Advance", variant="primary")
                    refresh_tbl_btn.click(_load_table, [mode_btn, ph_state], [tbl])
                    
                with gr.Tab("📊 Abstraction Vectors"):
                    with gr.Row():
                        cm = gr.Dropdown(["abstract","title"], value="abstract", label="Mode", scale=1)
                        ct = gr.Dropdown(["intertopic","heatmap","dendrogram"], value="intertopic", label="Vector Chart Type", scale=2)
                    chart_out = gr.HTML()
                    cm.change(_show_chart, [cm, ct], [chart_out]); ct.change(_show_chart, [cm, ct], [chart_out])
                    
                with gr.Tab("📄 Synthesis Report"):
                    btn_report = gr.Button("Generate Narrative", variant="primary")
                    report_box = gr.Markdown("### 📄 Narrative Report\n(Waiting for Phase 6 completion)")
                    btn_report.click(_get_report, [], [report_box])
                
                with gr.Tab("📥 Archival Exports"):
                    dl = gr.File(label="Artifacts", file_count="multiple", interactive=False)
                    btn_refresh_files = gr.Button("🔄 Sync Archive", variant="secondary")
                    btn_refresh_files.click(_get_files, [], [dl])

    # Event Bindings
    start_btn.click(init_analysis, [input_box, csv_up, chatbot], [input_box, chatbot, tbl, prg_display])
    send_btn.click(handle_chat, [msg_in, chatbot, mode_btn, ph_state], [msg_in, chatbot, tbl])
    btn_ingest.click(handle_chat, [gr.State("Ingest all uploaded PDFs and Word documents into the system."), chatbot, gr.State("abstract"), gr.State("CODES")], [msg_in, chatbot, tbl])
    btn_scratch.click(handle_chat, [gr.State("Check the local scratch folder and import any papers found there."), chatbot, gr.State("abstract"), gr.State("CODES")], [msg_in, chatbot, tbl])
    btn_analyze.click(handle_chat, [gr.State("run abstract only"), chatbot, gr.State("abstract"), gr.State("CODES")], [msg_in, chatbot, tbl])
    sub_btn.click(submit_review, [tbl, chatbot, mode_btn, ph_state], [chatbot, ph_state, tbl]).then(_get_report, [], [report_box])

if __name__ == "__main__":
    # Standard HF Space deployment settings
    theme = gr.themes.Soft(primary_hue="indigo", neutral_hue="slate", font=["Inter", "ui-sans-serif", "system-ui"])
    
    # Launch with dynamic port discovery and HF-standard theme
    demo.launch(
        theme=theme, 
        css=CSS
    )