import os, json, shutil import pandas as pd import gradio as gr from agent import chat_with_agent from tools import CSV_PATH, OUT_DIR, PDF_DIR, _embed, HEADERS try: _embed() except Exception as e: print(f">>> WARNING: Embedding pre-load failed: {e}") CSS = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=Plus+Jakarta+Sans:wght@600;700&display=swap'); :root { --primary: #f8fafc; --secondary: #818cf8; --accent: #a78bfa; --bg: #0b0f19; --panel: #111827; --border: rgba(255,255,255,0.1); } body, .gradio-container { font-family: 'Inter', sans-serif !important; background-color: var(--bg) !important; color: var(--primary) !important; } .hdr { font-family: 'Plus Jakarta Sans', sans-serif !important; font-size: 2.25rem !important; font-weight: 700; color: white !important; letter-spacing: -0.04em; margin-bottom: 2px !important; } .sub-hdr { font-size: 0.875rem !important; color: #94a3b8 !important; margin-bottom: 24px !important; } .sidebar { background: var(--panel) !important; border-right: 1px solid var(--border) !important; padding: 24px !important; } .main-content { background: var(--panel) !important; border-radius: 12px !important; border: 1px solid var(--border) !important; box-shadow: 0 4px 20px rgba(0,0,0,0.4) !important; } button.primary { background: var(--secondary) !important; color: white !important; border-radius: 8px !important; font-weight: 600 !important; border: none !important; transition: all 0.2s; } button.primary:hover { filter: brightness(1.1); transform: translateY(-1px); } .phase-orb { display: flex; align-items: center; gap: 8px; font-size: 0.75rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; color: #475569; margin-bottom: 12px; } .phase-orb.active { color: var(--secondary); } .phase-dot { width: 8px; height: 8px; border-radius: 50%; background: #334155; } .phase-orb.active .phase-dot { background: var(--secondary); box-shadow: 0 0 10px var(--secondary); } input, textarea, .gr-box, .gr-form, label, span, p, .markdown-text, h1, h2, h3 { color: #e2e8f0 !important; } input, textarea { border-radius: 8px !important; border: 1px solid #334155 !important; background: #1e293b !important; } input:focus { border-color: var(--secondary) !important; box-shadow: 0 0 0 2px rgba(129, 140, 248, 0.2) !important; } .gr-button-secondary { background: #1e293b !important; color: #cbd5e1 !important; border: 1px solid #334155 !important; } .gr-button-secondary:hover { background: #334155 !important; } """ HDR = '
Digital Curator
Enterprise Agentic Research Platform
' PRG_TPL = '
' \ '
1. Ingestion
' \ '
2. Algorithmic Coding
' \ '
3. Thematic Abstraction
' \ '
4. Report Narration
' \ '
' PHASE_FILE = {"CODES": "_labels.json", "THEMES": "_themes.json", "PAJAIS": "_taxonomy.json"} NEXT_PHASE = {"CODES": "THEMES", "THEMES": "PAJAIS", "PAJAIS": "REPORT"} def get_phase_html(current_phase): mapping = {"INGEST": "a1", "CODES": "a2", "THEMES": "a3", "PAJAIS": "a3", "REPORT": "a4"} active = mapping.get(current_phase, "a1") return PRG_TPL.format(**{k: "active" if k == active else "" for k in ["a1", "a2", "a3", "a4"]}) def init_analysis(query, csv_file, chat_hist): # 1. Clear old outputs if os.path.exists(OUT_DIR): for f in os.listdir(OUT_DIR): fp = os.path.join(OUT_DIR, f) if os.path.isfile(fp) and f != ".gitkeep": os.remove(fp) # 2. Query Sanitization query = (query or "").strip() if len(query) < 3 and not csv_file: return "Please enter a valid research topic or upload a CSV.", chat_hist, pd.DataFrame() # 3. Detection and Logic mode = "Hybrid" if (query and csv_file) else ("CSV" if csv_file else "Query") sources = {} total_papers = 0 # Handle CSV primary load if csv_file: try: df = pd.read_csv(csv_file.name) df = df[HEADERS[:len(df.columns)]] if len(df.columns) < len(HEADERS) else df[HEADERS] df["Sr No"] = range(1, len(df)+1) df["Source"] = "Uploaded CSV" df.to_csv(CSV_PATH, index=False) sources["CSV"] = len(df) total_papers = len(df) except Exception as e: return f"CSV Error: {e}", chat_hist, pd.DataFrame() else: # Initialize empty CSV if query only (do not overwrite if it already exists) if not os.path.exists(CSV_PATH): pd.DataFrame(columns=HEADERS + ["Source"]).to_csv(CSV_PATH, index=False) print(f"[MODE]: {mode}") # 4. Trigger Agent for Phase 1 (Fetch/Merge) and auto-advance # We send a specific command to the agent to start the ingestion trigger_msg = f"Start research on topic: '{query or 'AI and Societal Impact'}'. " if csv_file: trigger_msg += "I have uploaded a CSV dataset; merge new findings if needed. " trigger_msg += "Enforce MAX_TOTAL_PAPERS=120. Once papers are saved, proceed directly to Phase 2 (Clustering) using abstract mode." res = chat_with_agent(trigger_msg, "ux_session") # 5. Calculate Final Feedback and Visibility if os.path.exists(CSV_PATH): df_final = pd.read_csv(CSV_PATH) total_papers = len(df_final) # Detailed source breakdown from the "Source" column if "Source" in df_final.columns: counts = df_final["Source"].value_counts().to_dict() for s, c in counts.items(): sources[s] = c print(f"[PAPERS]: {total_papers}") source_lines = "\n".join([f"- {s}: {c} papers" for s, c in sources.items()]) feedback = (f"Detected Mode: {mode}\nSources:\n{source_lines}\nTotal: {total_papers} papers\n\nšŸ‘‰ Preparing dataset... Proceeding to Phase 2") new_hist = chat_hist + [{"role":"user","content": f"Init {mode}"}, {"role":"assistant","content": feedback + "\n\n" + res}] return "", new_hist, _load_table("abstract", "CODES"), get_phase_html("CODES") def handle_pdf_upload(files): if not files: return "No files selected." saved = [] for f in files: ext = os.path.splitext(f.name)[1].lower() if ext in [".pdf", ".docx"]: dest = os.path.join(PDF_DIR, os.path.basename(f.name)) shutil.copy(f.name, dest) saved.append(os.path.basename(f.name)) return f"āœ… {len(saved)} file(s) saved. Click 'šŸ“‘ Ingest Uploads' below or ask the agent." def _paper_titles_by_cluster(data: dict) -> dict: if not os.path.exists(CSV_PATH): return {} papers = pd.read_csv(CSV_PATH)[["Sr No", "Title"]].fillna("") title_map = {str(int(r["Sr No"])): r["Title"] for _, r in papers.iterrows()} return {k: [title_map.get(str(pid), f"#{pid}") for pid in v.get("paper_ids", [])] for k, v in data.items()} def _load_table(mode: str, phase: str) -> pd.DataFrame: try: path = os.path.join(OUT_DIR, mode + PHASE_FILE.get(phase, f"_{phase.lower()}.json")) if not os.path.exists(path): return pd.DataFrame() data = json.load(open(path, encoding="utf-8")) if phase == "CODES": cluster_papers = _paper_titles_by_cluster(data) rows = [] for k, v in data.items(): titles = cluster_papers.get(k, []) rows.append({"ID": k, "Label": v.get("label", ""), "Confidence": v.get("confidence", ""), "Reasoning": v.get("reasoning", ""), "Paper Count": len(titles), "Paper Titles": " | ".join(titles), "Approve": "yes", "Rename To": ""}) return pd.DataFrame(rows) if phase == "THEMES": return pd.DataFrame(data.get("themes", [])) items = data if isinstance(data, list) else list(data.values()) return pd.DataFrame([{"Theme": v.get("name",""), "Category": v.get("pajais_category",""), "Confidence": v.get("confidence","")} for v in items]) except: return pd.DataFrame() def handle_chat(msg, hist, mode, ph): try: res = chat_with_agent(msg, "ux_session") hist += [{"role":"user","content":msg}, {"role":"assistant","content":res}] return "", hist, _load_table(mode, ph) except Exception as e: return "", hist+[{"role":"assistant","content":f"Error: {e}"}], gr.update() def submit_review(df, hist, mode, ph): nxt = NEXT_PHASE.get(ph, ph) msg = f"User reviewed {ph}. Proceed to Phase {nxt}." if ph == "CODES" and df is not None and not df.empty and "Approve" in df.columns: renames = df[(df["Approve"].astype(str).str.lower() != "yes") & (df["Rename To"].astype(str).str.strip() != "")] if not renames.empty: changes = "; ".join(f'Cluster {r["ID"]}: rename label to "{r["Rename To"]}"' for _, r in renames.iterrows()) msg = (f"User reviewed {ph} and rejected some labels. Apply these changes: {changes}. Save and proceed to {nxt}.") _, new_hist, _ = handle_chat(msg, hist, mode, ph) return new_hist, nxt, _load_table(mode, nxt) def _get_files(): return [os.path.join(OUT_DIR, f) for f in os.listdir(OUT_DIR)] def _get_report(): p = next((os.path.join(OUT_DIR, n) for n in ["narrative.txt","consolidated_report.txt"] if os.path.exists(os.path.join(OUT_DIR, n))), None) return f"### šŸ“„ Latest Narrative Report\n\n{open(p, encoding='utf-8').read()}" if p else "### šŸ“„ Narrative Report\n(Not generated yet)" def _show_chart(m, t): path = os.path.join(OUT_DIR, f"{m}_{t}.html") return f'' if os.path.exists(path) else "Run Phase 2 first." def _load_curation() -> pd.DataFrame: if os.path.exists(CSV_PATH): df = pd.read_csv(CSV_PATH) cols = ["Sr No", "Title", "Web Link", "Research Type", "Research Type Confidence", "Research Type Reason", "Findings"] for c in cols: if c not in df.columns: df[c] = "" return df[cols] return pd.DataFrame() def _save_curation(updated_df: pd.DataFrame): if os.path.exists(CSV_PATH) and not updated_df.empty: full_df = pd.read_csv(CSV_PATH) for _, row in updated_df.iterrows(): idx = full_df[full_df["Sr No"] == row["Sr No"]].index if not idx.empty: full_df.loc[idx[0], "Research Type"] = row.get("Research Type", "") full_df.loc[idx[0], "Research Type Confidence"] = row.get("Research Type Confidence", "") full_df.loc[idx[0], "Research Type Reason"] = row.get("Research Type Reason", "") full_df.loc[idx[0], "Findings"] = row.get("Findings", "") full_df.to_csv(CSV_PATH, index=False) return gr.Info("āœ… Classification Overrides Saved!") return gr.Warning("No data to save.") with gr.Blocks() as demo: with gr.Row(): # LEFT SIDEBAR with gr.Column(scale=1, elem_classes="sidebar"): gr.HTML(HDR) prg_display = gr.HTML(get_phase_html("INGEST")) gr.HTML('
') input_box = gr.Textbox(placeholder="Enter research topic...", label="Research Query") start_btn = gr.Button("Execute Discovery", variant="primary") gr.HTML('
') with gr.Accordion("Artifact Uploads", open=False): csv_up = gr.File(label="Upload CSV Structure", file_types=[".csv"]) pdf_up = gr.File(label="Upload Raw Papers (PDF/Docx)", file_types=[".pdf", ".docx"], file_count="multiple") pdf_status = gr.Markdown() pdf_up.upload(handle_pdf_upload, [pdf_up], [pdf_status]) gr.HTML('
') gr.HTML('
Quick Actions
') btn_ingest = gr.Button("šŸ“‘ Ingest Uploads", variant="secondary") btn_scratch = gr.Button("šŸ“‚ Import Scratch", variant="secondary") btn_analyze = gr.Button("ā–¶ Run Analysis", variant="secondary") # RIGHT MAIN WORKSPACE with gr.Column(scale=3, elem_classes="main-content"): with gr.Tabs(elem_classes="tab-nav"): with gr.Tab("šŸ’¬ Agent Copilot"): chatbot = gr.Chatbot([], height=500, label="Research Assistant") with gr.Row(): msg_in = gr.Textbox(placeholder="Directly ask the agent to search, review, or summarize...", show_label=False, scale=5) send_btn = gr.Button("Send", variant="primary", scale=1) with gr.Tab("šŸ—‚ļø Data Curation"): curation_df = gr.Dataframe(interactive=True, label="Research Classification Review") gr.Markdown("
**šŸ’” Editorial Note:** Review the Agent's reasoning. Modify 'Research Type' if you disagree, then save.") with gr.Row(): refresh_cur_btn = gr.Button("šŸ”„ Load Ingestions", variant="secondary") save_cur_btn = gr.Button("šŸ’¾ Save Overrides", variant="primary") refresh_cur_btn.click(_load_curation, [], [curation_df]) save_cur_btn.click(_save_curation, [curation_df], []) with gr.Tab("šŸ“‹ Research Nodes"): ph_state = gr.State("CODES") mode_btn = gr.Radio(["abstract","title"], label="Review Mode", value="abstract") tbl = gr.Dataframe(interactive=True) gr.Markdown("
**šŸ’” Editorial Note:** Reject a label by setting **Approve** = 'no' and filling **Rename To**.") with gr.Row(): refresh_tbl_btn = gr.Button("šŸ”„ Sync Data", variant="secondary") sub_btn = gr.Button("Commit & Advance", variant="primary") refresh_tbl_btn.click(_load_table, [mode_btn, ph_state], [tbl]) with gr.Tab("šŸ“Š Abstraction Vectors"): with gr.Row(): cm = gr.Dropdown(["abstract","title"], value="abstract", label="Mode", scale=1) ct = gr.Dropdown(["intertopic","heatmap","dendrogram"], value="intertopic", label="Vector Chart Type", scale=2) chart_out = gr.HTML() cm.change(_show_chart, [cm, ct], [chart_out]); ct.change(_show_chart, [cm, ct], [chart_out]) with gr.Tab("šŸ“„ Synthesis Report"): btn_report = gr.Button("Generate Narrative", variant="primary") report_box = gr.Markdown("### šŸ“„ Narrative Report\n(Waiting for Phase 6 completion)") btn_report.click(_get_report, [], [report_box]) with gr.Tab("šŸ“„ Archival Exports"): dl = gr.File(label="Artifacts", file_count="multiple", interactive=False) btn_refresh_files = gr.Button("šŸ”„ Sync Archive", variant="secondary") btn_refresh_files.click(_get_files, [], [dl]) # Event Bindings start_btn.click(init_analysis, [input_box, csv_up, chatbot], [input_box, chatbot, tbl, prg_display]) send_btn.click(handle_chat, [msg_in, chatbot, mode_btn, ph_state], [msg_in, chatbot, tbl]) btn_ingest.click(handle_chat, [gr.State("Ingest all uploaded PDFs and Word documents into the system."), chatbot, gr.State("abstract"), gr.State("CODES")], [msg_in, chatbot, tbl]) btn_scratch.click(handle_chat, [gr.State("Check the local scratch folder and import any papers found there."), chatbot, gr.State("abstract"), gr.State("CODES")], [msg_in, chatbot, tbl]) btn_analyze.click(handle_chat, [gr.State("run abstract only"), chatbot, gr.State("abstract"), gr.State("CODES")], [msg_in, chatbot, tbl]) sub_btn.click(submit_review, [tbl, chatbot, mode_btn, ph_state], [chatbot, ph_state, tbl]).then(_get_report, [], [report_box]) if __name__ == "__main__": # Standard HF Space deployment settings theme = gr.themes.Soft(primary_hue="indigo", neutral_hue="slate", font=["Inter", "ui-sans-serif", "system-ui"]) # Launch with dynamic port discovery and HF-standard theme demo.launch( theme=theme, css=CSS )