Spaces:
Sleeping
Sleeping
| import os, json, shutil | |
| import pandas as pd | |
| import gradio as gr | |
| from agent import chat_with_agent | |
| from tools import CSV_PATH, OUT_DIR, PDF_DIR, _embed, HEADERS | |
| try: _embed() | |
| except Exception as e: print(f">>> WARNING: Embedding pre-load failed: {e}") | |
| CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=Plus+Jakarta+Sans:wght@600;700&display=swap'); | |
| :root { --primary: #f8fafc; --secondary: #818cf8; --accent: #a78bfa; --bg: #0b0f19; --panel: #111827; --border: rgba(255,255,255,0.1); } | |
| body, .gradio-container { font-family: 'Inter', sans-serif !important; background-color: var(--bg) !important; color: var(--primary) !important; } | |
| .hdr { font-family: 'Plus Jakarta Sans', sans-serif !important; font-size: 2.25rem !important; font-weight: 700; color: white !important; letter-spacing: -0.04em; margin-bottom: 2px !important; } | |
| .sub-hdr { font-size: 0.875rem !important; color: #94a3b8 !important; margin-bottom: 24px !important; } | |
| .sidebar { background: var(--panel) !important; border-right: 1px solid var(--border) !important; padding: 24px !important; } | |
| .main-content { background: var(--panel) !important; border-radius: 12px !important; border: 1px solid var(--border) !important; box-shadow: 0 4px 20px rgba(0,0,0,0.4) !important; } | |
| button.primary { background: var(--secondary) !important; color: white !important; border-radius: 8px !important; font-weight: 600 !important; border: none !important; transition: all 0.2s; } | |
| button.primary:hover { filter: brightness(1.1); transform: translateY(-1px); } | |
| .phase-orb { display: flex; align-items: center; gap: 8px; font-size: 0.75rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; color: #475569; margin-bottom: 12px; } | |
| .phase-orb.active { color: var(--secondary); } | |
| .phase-dot { width: 8px; height: 8px; border-radius: 50%; background: #334155; } | |
| .phase-orb.active .phase-dot { background: var(--secondary); box-shadow: 0 0 10px var(--secondary); } | |
| input, textarea, .gr-box, .gr-form, label, span, p, .markdown-text, h1, h2, h3 { color: #e2e8f0 !important; } | |
| input, textarea { border-radius: 8px !important; border: 1px solid #334155 !important; background: #1e293b !important; } | |
| input:focus { border-color: var(--secondary) !important; box-shadow: 0 0 0 2px rgba(129, 140, 248, 0.2) !important; } | |
| .gr-button-secondary { background: #1e293b !important; color: #cbd5e1 !important; border: 1px solid #334155 !important; } | |
| .gr-button-secondary:hover { background: #334155 !important; } | |
| """ | |
| HDR = '<div class="hdr">Digital Curator</div><div class="sub-hdr">Enterprise Agentic Research Platform</div>' | |
| PRG_TPL = '<div style="display: flex; flex-direction: column; gap: 4px;">' \ | |
| '<div class="phase-orb {a1}"><div class="phase-dot"></div>1. Ingestion</div>' \ | |
| '<div class="phase-orb {a2}"><div class="phase-dot"></div>2. Algorithmic Coding</div>' \ | |
| '<div class="phase-orb {a3}"><div class="phase-dot"></div>3. Thematic Abstraction</div>' \ | |
| '<div class="phase-orb {a4}"><div class="phase-dot"></div>4. Report Narration</div>' \ | |
| '</div>' | |
| PHASE_FILE = {"CODES": "_labels.json", "THEMES": "_themes.json", "PAJAIS": "_taxonomy.json"} | |
| NEXT_PHASE = {"CODES": "THEMES", "THEMES": "PAJAIS", "PAJAIS": "REPORT"} | |
| def get_phase_html(current_phase): | |
| mapping = {"INGEST": "a1", "CODES": "a2", "THEMES": "a3", "PAJAIS": "a3", "REPORT": "a4"} | |
| active = mapping.get(current_phase, "a1") | |
| return PRG_TPL.format(**{k: "active" if k == active else "" for k in ["a1", "a2", "a3", "a4"]}) | |
| def init_analysis(query, csv_file, chat_hist): | |
| # 1. Clear old outputs | |
| if os.path.exists(OUT_DIR): | |
| for f in os.listdir(OUT_DIR): | |
| fp = os.path.join(OUT_DIR, f) | |
| if os.path.isfile(fp) and f != ".gitkeep": os.remove(fp) | |
| # 2. Query Sanitization | |
| query = (query or "").strip() | |
| if len(query) < 3 and not csv_file: | |
| return "Please enter a valid research topic or upload a CSV.", chat_hist, pd.DataFrame() | |
| # 3. Detection and Logic | |
| mode = "Hybrid" if (query and csv_file) else ("CSV" if csv_file else "Query") | |
| sources = {} | |
| total_papers = 0 | |
| # Handle CSV primary load | |
| if csv_file: | |
| try: | |
| df = pd.read_csv(csv_file.name) | |
| df = df[HEADERS[:len(df.columns)]] if len(df.columns) < len(HEADERS) else df[HEADERS] | |
| df["Sr No"] = range(1, len(df)+1) | |
| df["Source"] = "Uploaded CSV" | |
| df.to_csv(CSV_PATH, index=False) | |
| sources["CSV"] = len(df) | |
| total_papers = len(df) | |
| except Exception as e: return f"CSV Error: {e}", chat_hist, pd.DataFrame() | |
| else: | |
| # Initialize empty CSV if query only (do not overwrite if it already exists) | |
| if not os.path.exists(CSV_PATH): | |
| pd.DataFrame(columns=HEADERS + ["Source"]).to_csv(CSV_PATH, index=False) | |
| print(f"[MODE]: {mode}") | |
| # 4. Trigger Agent for Phase 1 (Fetch/Merge) and auto-advance | |
| # We send a specific command to the agent to start the ingestion | |
| trigger_msg = f"Start research on topic: '{query or 'AI and Societal Impact'}'. " | |
| if csv_file: trigger_msg += "I have uploaded a CSV dataset; merge new findings if needed. " | |
| trigger_msg += "Enforce MAX_TOTAL_PAPERS=120. Once papers are saved, proceed directly to Phase 2 (Clustering) using abstract mode." | |
| res = chat_with_agent(trigger_msg, "ux_session") | |
| # 5. Calculate Final Feedback and Visibility | |
| if os.path.exists(CSV_PATH): | |
| df_final = pd.read_csv(CSV_PATH) | |
| total_papers = len(df_final) | |
| # Detailed source breakdown from the "Source" column | |
| if "Source" in df_final.columns: | |
| counts = df_final["Source"].value_counts().to_dict() | |
| for s, c in counts.items(): sources[s] = c | |
| print(f"[PAPERS]: {total_papers}") | |
| source_lines = "\n".join([f"- {s}: {c} papers" for s, c in sources.items()]) | |
| feedback = (f"Detected Mode: {mode}\nSources:\n{source_lines}\nTotal: {total_papers} papers\n\n👉 Preparing dataset... Proceeding to Phase 2") | |
| new_hist = chat_hist + [{"role":"user","content": f"Init {mode}"}, {"role":"assistant","content": feedback + "\n\n" + res}] | |
| return "", new_hist, _load_table("abstract", "CODES"), get_phase_html("CODES") | |
| def handle_pdf_upload(files): | |
| if not files: return "No files selected." | |
| saved = [] | |
| for f in files: | |
| ext = os.path.splitext(f.name)[1].lower() | |
| if ext in [".pdf", ".docx"]: | |
| dest = os.path.join(PDF_DIR, os.path.basename(f.name)) | |
| shutil.copy(f.name, dest) | |
| saved.append(os.path.basename(f.name)) | |
| return f"✅ {len(saved)} file(s) saved. Click '📑 Ingest Uploads' below or ask the agent." | |
| def _paper_titles_by_cluster(data: dict) -> dict: | |
| if not os.path.exists(CSV_PATH): return {} | |
| papers = pd.read_csv(CSV_PATH)[["Sr No", "Title"]].fillna("") | |
| title_map = {str(int(r["Sr No"])): r["Title"] for _, r in papers.iterrows()} | |
| return {k: [title_map.get(str(pid), f"#{pid}") for pid in v.get("paper_ids", [])] for k, v in data.items()} | |
| def _load_table(mode: str, phase: str) -> pd.DataFrame: | |
| try: | |
| path = os.path.join(OUT_DIR, mode + PHASE_FILE.get(phase, f"_{phase.lower()}.json")) | |
| if not os.path.exists(path): return pd.DataFrame() | |
| data = json.load(open(path, encoding="utf-8")) | |
| if phase == "CODES": | |
| cluster_papers = _paper_titles_by_cluster(data) | |
| rows = [] | |
| for k, v in data.items(): | |
| titles = cluster_papers.get(k, []) | |
| rows.append({"ID": k, "Label": v.get("label", ""), "Confidence": v.get("confidence", ""), "Reasoning": v.get("reasoning", ""), "Paper Count": len(titles), "Paper Titles": " | ".join(titles), "Approve": "yes", "Rename To": ""}) | |
| return pd.DataFrame(rows) | |
| if phase == "THEMES": return pd.DataFrame(data.get("themes", [])) | |
| items = data if isinstance(data, list) else list(data.values()) | |
| return pd.DataFrame([{"Theme": v.get("name",""), "Category": v.get("pajais_category",""), "Confidence": v.get("confidence","")} for v in items]) | |
| except: return pd.DataFrame() | |
| def handle_chat(msg, hist, mode, ph): | |
| try: | |
| res = chat_with_agent(msg, "ux_session") | |
| hist += [{"role":"user","content":msg}, {"role":"assistant","content":res}] | |
| return "", hist, _load_table(mode, ph) | |
| except Exception as e: return "", hist+[{"role":"assistant","content":f"Error: {e}"}], gr.update() | |
| def submit_review(df, hist, mode, ph): | |
| nxt = NEXT_PHASE.get(ph, ph) | |
| msg = f"User reviewed {ph}. Proceed to Phase {nxt}." | |
| if ph == "CODES" and df is not None and not df.empty and "Approve" in df.columns: | |
| renames = df[(df["Approve"].astype(str).str.lower() != "yes") & (df["Rename To"].astype(str).str.strip() != "")] | |
| if not renames.empty: | |
| changes = "; ".join(f'Cluster {r["ID"]}: rename label to "{r["Rename To"]}"' for _, r in renames.iterrows()) | |
| msg = (f"User reviewed {ph} and rejected some labels. Apply these changes: {changes}. Save and proceed to {nxt}.") | |
| _, new_hist, _ = handle_chat(msg, hist, mode, ph) | |
| return new_hist, nxt, _load_table(mode, nxt) | |
| def _get_files(): return [os.path.join(OUT_DIR, f) for f in os.listdir(OUT_DIR)] | |
| def _get_report(): | |
| p = next((os.path.join(OUT_DIR, n) for n in ["narrative.txt","consolidated_report.txt"] if os.path.exists(os.path.join(OUT_DIR, n))), None) | |
| return f"### 📄 Latest Narrative Report\n\n{open(p, encoding='utf-8').read()}" if p else "### 📄 Narrative Report\n(Not generated yet)" | |
| def _show_chart(m, t): | |
| path = os.path.join(OUT_DIR, f"{m}_{t}.html") | |
| return f'<iframe srcdoc="{open(path, encoding="utf-8").read().replace(chr(34),""")}" width="100%" height="450"></iframe>' if os.path.exists(path) else "Run Phase 2 first." | |
| def _load_curation() -> pd.DataFrame: | |
| if os.path.exists(CSV_PATH): | |
| df = pd.read_csv(CSV_PATH) | |
| cols = ["Sr No", "Title", "Web Link", "Research Type", "Research Type Confidence", "Research Type Reason", "Findings"] | |
| for c in cols: | |
| if c not in df.columns: df[c] = "" | |
| return df[cols] | |
| return pd.DataFrame() | |
| def _save_curation(updated_df: pd.DataFrame): | |
| if os.path.exists(CSV_PATH) and not updated_df.empty: | |
| full_df = pd.read_csv(CSV_PATH) | |
| for _, row in updated_df.iterrows(): | |
| idx = full_df[full_df["Sr No"] == row["Sr No"]].index | |
| if not idx.empty: | |
| full_df.loc[idx[0], "Research Type"] = row.get("Research Type", "") | |
| full_df.loc[idx[0], "Research Type Confidence"] = row.get("Research Type Confidence", "") | |
| full_df.loc[idx[0], "Research Type Reason"] = row.get("Research Type Reason", "") | |
| full_df.loc[idx[0], "Findings"] = row.get("Findings", "") | |
| full_df.to_csv(CSV_PATH, index=False) | |
| return gr.Info("✅ Classification Overrides Saved!") | |
| return gr.Warning("No data to save.") | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| # LEFT SIDEBAR | |
| with gr.Column(scale=1, elem_classes="sidebar"): | |
| gr.HTML(HDR) | |
| prg_display = gr.HTML(get_phase_html("INGEST")) | |
| gr.HTML('<div style="height: 24px; border-top: 1px solid #f1f5f9; margin-top: 24px;"></div>') | |
| input_box = gr.Textbox(placeholder="Enter research topic...", label="Research Query") | |
| start_btn = gr.Button("Execute Discovery", variant="primary") | |
| gr.HTML('<div style="height: 32px"></div>') | |
| with gr.Accordion("Artifact Uploads", open=False): | |
| csv_up = gr.File(label="Upload CSV Structure", file_types=[".csv"]) | |
| pdf_up = gr.File(label="Upload Raw Papers (PDF/Docx)", file_types=[".pdf", ".docx"], file_count="multiple") | |
| pdf_status = gr.Markdown() | |
| pdf_up.upload(handle_pdf_upload, [pdf_up], [pdf_status]) | |
| gr.HTML('<div style="height: 32px"></div>') | |
| gr.HTML('<div class="sub-hdr">Quick Actions</div>') | |
| btn_ingest = gr.Button("📑 Ingest Uploads", variant="secondary") | |
| btn_scratch = gr.Button("📂 Import Scratch", variant="secondary") | |
| btn_analyze = gr.Button("▶ Run Analysis", variant="secondary") | |
| # RIGHT MAIN WORKSPACE | |
| with gr.Column(scale=3, elem_classes="main-content"): | |
| with gr.Tabs(elem_classes="tab-nav"): | |
| with gr.Tab("💬 Agent Copilot"): | |
| chatbot = gr.Chatbot([], height=500, label="Research Assistant") | |
| with gr.Row(): | |
| msg_in = gr.Textbox(placeholder="Directly ask the agent to search, review, or summarize...", show_label=False, scale=5) | |
| send_btn = gr.Button("Send", variant="primary", scale=1) | |
| with gr.Tab("🗂️ Data Curation"): | |
| curation_df = gr.Dataframe(interactive=True, label="Research Classification Review") | |
| gr.Markdown("<br>**💡 Editorial Note:** Review the Agent's reasoning. Modify 'Research Type' if you disagree, then save.") | |
| with gr.Row(): | |
| refresh_cur_btn = gr.Button("🔄 Load Ingestions", variant="secondary") | |
| save_cur_btn = gr.Button("💾 Save Overrides", variant="primary") | |
| refresh_cur_btn.click(_load_curation, [], [curation_df]) | |
| save_cur_btn.click(_save_curation, [curation_df], []) | |
| with gr.Tab("📋 Research Nodes"): | |
| ph_state = gr.State("CODES") | |
| mode_btn = gr.Radio(["abstract","title"], label="Review Mode", value="abstract") | |
| tbl = gr.Dataframe(interactive=True) | |
| gr.Markdown("<br>**💡 Editorial Note:** Reject a label by setting **Approve** = 'no' and filling **Rename To**.") | |
| with gr.Row(): | |
| refresh_tbl_btn = gr.Button("🔄 Sync Data", variant="secondary") | |
| sub_btn = gr.Button("Commit & Advance", variant="primary") | |
| refresh_tbl_btn.click(_load_table, [mode_btn, ph_state], [tbl]) | |
| with gr.Tab("📊 Abstraction Vectors"): | |
| with gr.Row(): | |
| cm = gr.Dropdown(["abstract","title"], value="abstract", label="Mode", scale=1) | |
| ct = gr.Dropdown(["intertopic","heatmap","dendrogram"], value="intertopic", label="Vector Chart Type", scale=2) | |
| chart_out = gr.HTML() | |
| cm.change(_show_chart, [cm, ct], [chart_out]); ct.change(_show_chart, [cm, ct], [chart_out]) | |
| with gr.Tab("📄 Synthesis Report"): | |
| btn_report = gr.Button("Generate Narrative", variant="primary") | |
| report_box = gr.Markdown("### 📄 Narrative Report\n(Waiting for Phase 6 completion)") | |
| btn_report.click(_get_report, [], [report_box]) | |
| with gr.Tab("📥 Archival Exports"): | |
| dl = gr.File(label="Artifacts", file_count="multiple", interactive=False) | |
| btn_refresh_files = gr.Button("🔄 Sync Archive", variant="secondary") | |
| btn_refresh_files.click(_get_files, [], [dl]) | |
| # Event Bindings | |
| start_btn.click(init_analysis, [input_box, csv_up, chatbot], [input_box, chatbot, tbl, prg_display]) | |
| send_btn.click(handle_chat, [msg_in, chatbot, mode_btn, ph_state], [msg_in, chatbot, tbl]) | |
| btn_ingest.click(handle_chat, [gr.State("Ingest all uploaded PDFs and Word documents into the system."), chatbot, gr.State("abstract"), gr.State("CODES")], [msg_in, chatbot, tbl]) | |
| btn_scratch.click(handle_chat, [gr.State("Check the local scratch folder and import any papers found there."), chatbot, gr.State("abstract"), gr.State("CODES")], [msg_in, chatbot, tbl]) | |
| btn_analyze.click(handle_chat, [gr.State("run abstract only"), chatbot, gr.State("abstract"), gr.State("CODES")], [msg_in, chatbot, tbl]) | |
| sub_btn.click(submit_review, [tbl, chatbot, mode_btn, ph_state], [chatbot, ph_state, tbl]).then(_get_report, [], [report_box]) | |
| if __name__ == "__main__": | |
| # Standard HF Space deployment settings | |
| theme = gr.themes.Soft(primary_hue="indigo", neutral_hue="slate", font=["Inter", "ui-sans-serif", "system-ui"]) | |
| # Launch with dynamic port discovery and HF-standard theme | |
| demo.launch( | |
| theme=theme, | |
| css=CSS | |
| ) |