File size: 10,166 Bytes
aae6699
2fccbc6
 
 
 
 
 
40db972
325f883
3d0c21e
325f883
2f7a273
c99015b
8d366c3
c99015b
2f7a273
 
 
dddc062
2fccbc6
 
dddc062
325f883
 
2f7a273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d0c21e
 
 
 
d28faca
3d0c21e
 
 
 
2f7a273
 
3d0c21e
 
 
 
 
2f7a273
3d0c21e
 
2f7a273
3d0c21e
 
2f7a273
 
 
 
 
3d0c21e
 
 
 
 
 
2f7a273
 
3d0c21e
2f7a273
3d0c21e
 
 
2f7a273
3d0c21e
 
 
 
 
 
 
 
 
2f7a273
3d0c21e
 
2f7a273
 
 
 
 
 
 
3d0c21e
2f7a273
 
3d0c21e
2f7a273
3d0c21e
 
 
 
2f7a273
 
3d0c21e
2f7a273
3d0c21e
2f7a273
3d0c21e
 
 
 
2f7a273
 
3d0c21e
2f7a273
3d0c21e
 
2f7a273
 
 
 
 
 
 
 
 
 
 
 
3d0c21e
 
 
 
2f7a273
 
 
3d0c21e
 
 
 
 
 
 
 
2f7a273
2fccbc6
325f883
 
c99015b
 
2f7a273
8d366c3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# app.py
from __future__ import annotations
import os
import traceback
import regex as re2
from typing import List, Tuple, Dict, Any

import gradio as gr
import pandas as pd
from datetime import datetime

# --- BACKEND IMPORTS ---
from langchain.agents.agent_types import AgentType
from langchain_cohere import ChatCohere
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

# --- LOCAL MODULE IMPORTS ---
# (Assuming these files exist in your project)
from settings import (
    HEALTHCARE_SETTINGS, GENERAL_CONVERSATION_PROMPT, USE_SCENARIO_ENGINE, DEBUG_PLAN,
    COHERE_MODEL_PRIMARY, COHERE_TIMEOUT_S, USE_OPEN_FALLBACKS
)
from audit_log import log_event
from privacy import safety_filter, refusal_reply
from llm_router import cohere_chat, _co_client, cohere_embed

# --- BACKEND UTILITY FUNCTIONS ---

def _sanitize_text(s: str) -> str:
    if not isinstance(s, str):
        return s
    return re2.sub(r'[\p{C}--[\n\t]]+', '', s)

def _create_enhanced_prompt(user_scenario: str) -> str:
    """Uses an LLM to pre-process the user's messy prompt into a structured brief."""
    prompt_for_planner = f"""
You are an expert data analysis project manager. Your task is to read the user's unstructured scenario below and create a clear, structured brief for a data analysis AI.

From the user's text, extract the following:
1.  **Primary Objective:** A one-sentence summary of the user's main goal.
2.  **Key Tasks:** A numbered list of ALL the specific questions the user wants answered.
3.  **Expert Guidelines & Assumptions:** A bulleted list of any specific numbers, metrics, or calculation methods mentioned.
4.  **Required Output Format:** A description of how the user wants the final answer structured.

CRITICAL INSTRUCTION: Tell the data analyst that it MUST answer ALL of the key tasks before providing its final answer.

--- USER'S SCENARIO ---
{user_scenario}
"""
    structured_brief = cohere_chat(prompt_for_planner)
    return structured_brief if structured_brief else user_scenario

def _append_msg(history_messages: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
    return (history_messages or []) + [{"role": role, "content": content}]

def ping_cohere() -> str:
    """Lightweight health check against Cohere."""
    try:
        cli = _co_client()
        if not cli:
            return "Cohere client not initialized. Is COHERE_API_KEY set?"
        vecs = cohere_embed(["hello", "world"])
        if vecs and len(vecs) == 2:
            return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)"
        return "Cohere reachable, but embeddings returned no vectors."
    except Exception as e:
        return f"Cohere ping failed: {e}"

# --- THE CORE ANALYSIS ENGINE ---

def handle(user_msg: str, files: list) -> str:
    """
    This is the powerful backend engine. It takes the user's query and files
    and returns only the final AI-generated text response.
    """
    try:
        safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
        if blocked_in:
            return refusal_reply(reason_in)

        file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]

        if file_paths:
            dataframes = [pd.read_csv(p) for p in file_paths if p.endswith('.csv')]
            if not dataframes:
                return "Please upload at least one CSV file."

            llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
            enhanced_prompt = _create_enhanced_prompt(safe_in)
            
            AGENT_PREFIX = """
You are a data analysis agent. You have access to one or more pandas dataframes.
You MUST respond in one of two formats.

FORMAT 1: To perform a task. Your response must be a single block of text with ONLY these three sections:
Thought: Your step-by-step reasoning.
Action: python_repl_ast
Action Input: The Python code to run.

FORMAT 2: To give the final answer. Your response must be a single block of text with ONLY these two sections:
Thought: I have now answered all the user's questions and can provide the final report.
Final Answer: The complete answer, structured as the user requested.

CRITICAL RULE: NEVER combine `Action` and `Final Answer` in the same response. Choose one format.
"""

            agent = create_pandas_dataframe_agent(
                llm,
                dataframes,
                agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
                verbose=True,
                allow_dangerous_code=True,
                prefix=AGENT_PREFIX,
                max_iterations=50
            )

            result = agent.invoke({"input": enhanced_prompt})
            reply = _sanitize_text(result.get("output", "No output generated."))
            return reply
        else:
            # General conversation mode if no files are uploaded
            prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
            reply = cohere_chat(prompt) or "How can I help further?"
            return _sanitize_text(reply)

    except Exception as e:
        tb = traceback.format_exc()
        log_event("app_error", None, {"err": str(e), "tb": tb})
        return f"A critical error occurred: {e}"

# ---------------- THE NEW PROFESSIONAL UI ----------------
with gr.Blocks(theme="soft", css="style.css") as demo:
    # State to store the history of all assessments in this session
    assessment_history = gr.State([])

    gr.Markdown("# ClarityOps Augmented Decision Tool")

    with gr.Row(variant="panel"):
        # --- LEFT COLUMN: CONTROLS ---
        with gr.Column(scale=1):
            gr.Markdown("## New Assessment")
            files_input = gr.Files(
                label="Upload Data Files (CSV recommended)",
                file_count="multiple",
                type="filepath",
                file_types=[".csv"]
            )
            prompt_input = gr.Textbox(
                label="Prompt",
                placeholder="Paste your scenario, tasks, and any specific instructions here.",
                lines=15
            )
            with gr.Row():
                send_btn = gr.Button("▶️ Run Analysis", variant="primary", scale=2)
                clear_btn = gr.Button("🗑️ Clear")
            
            ping_btn = gr.Button("Ping Cohere")
            ping_out = gr.Markdown()

        # --- RIGHT COLUMN: RESULTS & HISTORY ---
        with gr.Column(scale=2):
            with gr.Tabs():
                # --- TAB 1: CURRENT ASSESSMENT ---
                with gr.TabItem("Current Assessment", id=0):
                    chat_history_output = gr.Chatbot(
                        label="Analysis Output",
                        bubble_full_width=True,
                        height=600
                    )
                # --- TAB 2: ASSESSMENT HISTORY ---
                with gr.TabItem("Assessment History", id=1):
                    gr.Markdown("## Review Past Assessments")
                    history_dropdown = gr.Dropdown(
                        label="Select an assessment to review",
                        choices=[]
                    )
                    history_display = gr.Markdown(
                        label="Selected Assessment Details"
                    )

    # --- UI LOGIC ---
    def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
        if not prompt or not files:
            gr.Warning("Please provide both a prompt and at least one data file.")
            return chat_history_list, history_state_list, gr.update()

        # 1. Append the user's message to the chat
        chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
        
        # 2. Call the powerful backend engine to get the AI response
        ai_response_text = handle(prompt, files)

        # 3. Append the AI's response to the chat
        final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
        
        # 4. Save the completed assessment to our history state
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        file_names = [os.path.basename(f) for f in files]
        
        new_assessment = {
            "id": timestamp, "prompt": prompt, "files": file_names,
            "response": ai_response_text
        }
        updated_history = history_state_list + [new_assessment]
        
        # 5. Create user-friendly labels for the history dropdown
        history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
        
        return final_chat, updated_history, gr.update(choices=history_labels)

    def view_history(selection, history_state_list):
        if not selection or not history_state_list: return ""
        selected_id = selection.split(" - ")[0]
        selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)

        if selected_assessment:
            file_list_md = "\n- ".join(selected_assessment['files'])
            return f"""
### Assessment from: {selected_assessment['id']}
**Files Used:**
- {file_list_md}
---
**Original Prompt:**
> {selected_assessment['prompt']}
---
**AI Generated Response:**
{selected_assessment['response']}
"""
        return "Could not find the selected assessment."

    # Wire up the components
    send_btn.click(
        run_analysis_wrapper,
        inputs=[prompt_input, files_input, chat_history_output, assessment_history],
        outputs=[chat_history_output, assessment_history, history_dropdown]
    )
    
    history_dropdown.change(
        view_history,
        inputs=[history_dropdown, assessment_history],
        outputs=[history_display]
    )

    clear_btn.click(lambda: (None, None, [], []), outputs=[prompt_input, files_input, chat_history_output, assessment_history])
    ping_btn.click(lambda: ping_cohere(), outputs=[ping_out])

if __name__ == "__main__":
    if not os.getenv("COHERE_API_KEY"):
        print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")

    demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))