Spaces:
Sleeping
Sleeping
File size: 8,030 Bytes
aae6699 2fccbc6 40db972 325f883 c99015b 38cbb08 c99015b 2fccbc6 dddc062 2fccbc6 dddc062 325f883 c1ff5e2 ff957d1 325f883 d5495e2 325f883 2fccbc6 325f883 2fccbc6 325f883 2fccbc6 325f883 c90a683 2fccbc6 c90a683 2fccbc6 c90a683 325f883 c90a683 2fccbc6 c90a683 dddc062 c90a683 2fccbc6 38cbb08 2fccbc6 a2b1fdb 2fccbc6 c90a683 2fccbc6 325f883 aae6699 a2b1fdb 325f883 2fccbc6 325f883 c99015b 2b74cfe 38cbb08 c99015b c90a683 38cbb08 c90a683 c99015b c90a683 84136c9 aec014b 84136c9 c7867b9 c99015b 38cbb08 c99015b a2b1fdb 325f883 38cbb08 a2b1fdb 38cbb08 a2b1fdb 325f883 2fccbc6 325f883 c90a683 dddc062 c99015b 2fccbc6 c99015b 2fccbc6 c90a683 2fccbc6 dddc062 c90a683 2fccbc6 325f883 7c0897e 325f883 c99015b 2fccbc6 325f883 c99015b dddc062 2fccbc6 dddc062 38cbb08 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
# app.py
from __future__ import annotations
import os
import traceback
import regex as re2
from typing import List, Tuple, Dict, Any
import gradio as gr
import pandas as pd
# New additions for data analysis agent
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain_cohere import ChatCohere # <-- NEW, CORRECT IMPORT
# ---- Local modules
from settings import (
HEALTHCARE_SETTINGS, GENERAL_CONVERSATION_PROMPT, USE_SCENARIO_ENGINE, DEBUG_PLAN,
COHERE_MODEL_PRIMARY, COHERE_TIMEOUT_S, USE_OPEN_FALLBACKS
)
from audit_log import log_event
from privacy import safety_filter, refusal_reply
from data_registry import DataRegistry
from upload_ingest import extract_text_from_files
from healthcare_analysis import HealthcareAnalyzer
from scenario_planner import parse_to_plan
from scenario_engine import ScenarioEngine
from rag import RAGIndex
from llm_router import generate_narrative, cohere_chat, open_fallback_chat, _co_client, cohere_embed
from narrative_safetynet import build_narrative
# ---------------- Utilities ----------------
def _sanitize_text(s: str) -> str:
if not isinstance(s, str):
return s
return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
# --- NEW: The "Intake Analyst" AI ---
def _create_enhanced_prompt(user_scenario: str) -> str:
"""
Uses an LLM to pre-process the user's messy prompt into a structured brief
for the data analysis agent.
"""
prompt_for_planner = f"""
You are an expert data analysis project manager. Your task is to read the user's unstructured scenario below and create a clear, structured brief for a data analysis AI.
From the user's text, extract the following:
1. **Primary Objective:** A one-sentence summary of the user's main goal.
2. **Key Tasks:** A numbered list of the specific questions the user wants answered.
3. **Expert Guidelines & Assumptions:** A bulleted list of EVERY specific number, metric, calculation method, or assumption mentioned in the text. This is critical for high-quality analysis.
4. **Required Output Format:** A description of how the user wants the final answer to be structured.
Present this as a clean brief. Then, include the user's original text at the end.
--- USER'S SCENARIO ---
{user_scenario}
"""
structured_brief = cohere_chat(prompt_for_planner)
if not structured_brief:
return user_scenario
return structured_brief
def is_healthcare_scenario(text: str, has_files: bool) -> bool:
"""
Dynamic detection: require uploaded files AND either structured scenario sections
or healthcare keywords (configured in settings).
"""
t = (text or "").lower() # <-- INDENTATION IS NOW FIXED
kws = HEALTHCARE_SETTINGS["healthcare_keywords"]
structured = any(s in t for s in ["background", "situation", "tasks", "deliverables"])
return has_files and (structured or any(k in t for k in kws))
def _append_msg(history_messages: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
return (history_messages or []) + [{"role": role, "content": content}]
def ping_cohere() -> str:
"""Lightweight health check against Cohere (embeddings call)."""
try:
cli = _co_client()
if not cli:
return "Cohere client not initialized. Is COHERE_API_KEY set?"
vecs = cohere_embed(["hello", "world"])
if vecs and len(vecs) == 2:
return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)"
return "Cohere reachable, but embeddings returned no vectors."
except Exception as e:
return f"Cohere ping failed: {e}"
# ---------------- Core handler ----------------
def handle(user_msg: str, history_messages: List[Dict[str, str]], files: list) -> Tuple[List[Dict[str, str]], str]:
"""
Core logic handler with the new two-step AI process.
"""
try:
safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
if blocked_in:
reply = refusal_reply(reason_in)
new_hist = _append_msg(history_messages, "user", user_msg)
new_hist = _append_msg(new_hist, "assistant", reply)
return new_hist, ""
file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
if file_paths:
try:
dataframes = [pd.read_csv(p) for p in file_paths if p.endswith('.csv')]
if not dataframes:
reply = "Please upload at least one CSV file."
new_hist = _append_msg(history_messages, "user", user_msg)
new_hist = _append_msg(new_hist, "assistant", reply)
return new_hist, ""
llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
enhanced_prompt = _create_enhanced_prompt(safe_in)
AGENT_PREFIX = """...""" # Prefix content remains the same
agent = create_pandas_dataframe_agent(
llm,
dataframes,
agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True,
allow_dangerous_code=True,
prefix=AGENT_PREFIX
)
# Use the new .invoke() method
result = agent.invoke({"input": enhanced_prompt})
reply = _sanitize_text(result.get("output", "No output generated."))
except Exception as e:
tb = traceback.format_exc()
log_event("agent_error", None, {"err": str(e), "tb": tb})
reply = f"An error occurred while analyzing the data: {e}"
else:
prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
reply = cohere_chat(prompt) or open_fallback_chat(prompt) or "How can I help further?"
reply = _sanitize_text(reply)
new_hist = _append_msg(history_messages, "user", user_msg)
new_hist = _append_msg(new_hist, "assistant", reply)
return new_hist, ""
except Exception as e:
tb = traceback.format_exc()
log_event("app_error", None, {"err": str(e), "tb": tb})
reply = f"A critical error occurred: {e}\n\n{tb}"
new_hist = _append_msg(history_messages, "user", user_msg)
new_hist = _append_msg(new_hist, "assistant", reply)
return new_hist, ""
# ---------------- UI ----------------
with gr.Blocks(analytics_enabled=False) as demo:
gr.Markdown("## Universal AI Data Analyst")
with gr.Row():
chat = gr.Chatbot(label="Chat History", type="messages", height=520)
files = gr.Files(
label="Upload Data Files (CSV recommended)",
file_count="multiple",
type="filepath",
file_types=[".csv"]
)
msg = gr.Textbox(label="Prompt", placeholder="Paste your scenario, tasks, and any specific instructions here.")
with gr.Row():
send = gr.Button("Send")
clear = gr.Button("Clear")
ping_btn = gr.Button("Ping Cohere")
ping_out = gr.Markdown()
def _on_send(m, h, f):
h2, _ = handle(m, h, f or [])
return h2, ""
send.click(_on_send, inputs=[msg, chat, files], outputs=[chat, msg])
msg.submit(_on_send, inputs=[msg, chat, files], outputs=[chat, msg])
clear.click(lambda: ([], "", None), outputs=[chat, msg, files])
ping_btn.click(lambda: ping_cohere(), outputs=[ping_out])
if __name__ == "__main__":
if not os.getenv("COHERE_API_KEY"):
print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
log_event("startup", None, {
"cohere_key_present": bool(os.getenv("COHERE_API_KEY")),
"cohere_model": COHERE_MODEL_PRIMARY,
"open_fallbacks": USE_OPEN_FALLBACKS,
"timeout_s": COHERE_TIMEOUT_S
})
demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))
|