Spaces:
Sleeping
Sleeping
Rajan Sharma
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -37,18 +37,25 @@ def _sanitize_text(s: str) -> str:
|
|
| 37 |
if not isinstance(s, str): return s
|
| 38 |
return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
|
|
|
| 42 |
prompt_for_planner = f"""
|
| 43 |
-
You are an expert data analysis project manager. Your task is to
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
CRITICAL INSTRUCTION: Tell the data analyst that it MUST answer ALL of the key tasks before providing its final answer.
|
| 50 |
--- USER'S SCENARIO ---
|
| 51 |
{user_scenario}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
"""
|
| 53 |
structured_brief = cohere_chat(prompt_for_planner)
|
| 54 |
return structured_brief if structured_brief else user_scenario
|
|
@@ -78,35 +85,41 @@ def handle(user_msg: str, files: list) -> str:
|
|
| 78 |
|
| 79 |
if file_paths:
|
| 80 |
dataframes = []
|
|
|
|
| 81 |
for p in file_paths:
|
| 82 |
if p.endswith('.csv'):
|
| 83 |
try:
|
| 84 |
df = pd.read_csv(p)
|
| 85 |
dataframes.append(df)
|
|
|
|
| 86 |
except UnicodeDecodeError:
|
| 87 |
print(f"Warning: Failed to read {os.path.basename(p)} with UTF-8. Falling back to latin1 encoding.")
|
| 88 |
df = pd.read_csv(p, encoding='latin1')
|
| 89 |
dataframes.append(df)
|
|
|
|
| 90 |
|
| 91 |
if not dataframes: return "Please upload at least one CSV file."
|
| 92 |
|
|
|
|
|
|
|
|
|
|
| 93 |
llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
|
| 94 |
-
enhanced_prompt = _create_enhanced_prompt(safe_in)
|
| 95 |
|
| 96 |
AGENT_PREFIX = """
|
| 97 |
-
You are a data analysis agent. You have access to one or more pandas dataframes.
|
| 98 |
You MUST respond in one of two formats.
|
| 99 |
|
| 100 |
-
FORMAT 1: To perform a task.
|
| 101 |
-
Thought: Your step-by-step reasoning.
|
| 102 |
Action: python_repl_ast
|
| 103 |
-
Action Input: The Python code to run.
|
| 104 |
|
| 105 |
-
FORMAT 2: To give the final answer.
|
| 106 |
-
Thought: I have now
|
| 107 |
Final Answer: The complete answer, structured as the user requested.
|
| 108 |
|
| 109 |
-
CRITICAL RULE: NEVER
|
| 110 |
"""
|
| 111 |
|
| 112 |
agent = create_pandas_dataframe_agent(
|
|
@@ -131,7 +144,8 @@ TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
|
|
| 131 |
# ---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
|
| 132 |
with gr.Blocks(theme="soft", css="style.css") as demo:
|
| 133 |
assessment_history = gr.State([])
|
| 134 |
-
|
|
|
|
| 135 |
# --- MODALS (POPUPS) DEFINED FIRST, INITIALLY HIDDEN ---
|
| 136 |
with gr.Group(visible=False) as privacy_modal:
|
| 137 |
with gr.Blocks():
|
|
@@ -162,7 +176,11 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 162 |
with gr.Column(scale=2):
|
| 163 |
with gr.Tabs():
|
| 164 |
with gr.TabItem("Current Assessment", id=0):
|
| 165 |
-
chat_history_output = gr.Chatbot(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
with gr.TabItem("Assessment History", id=1):
|
| 167 |
gr.Markdown("## Review Past Assessments")
|
| 168 |
history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
|
|
@@ -175,37 +193,28 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 175 |
terms_link = gr.Button("Terms of Service", variant="link")
|
| 176 |
|
| 177 |
# --- UI LOGIC ---
|
| 178 |
-
|
| 179 |
-
# THIS IS THE NEW, RESPONSIVE "RUN" FUNCTION
|
| 180 |
def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
|
| 181 |
if not prompt or not files:
|
| 182 |
gr.Warning("Please provide both a prompt and at least one data file.")
|
| 183 |
-
# We must yield the original state to prevent an error on empty run
|
| 184 |
yield chat_history_list, history_state_list, gr.update()
|
| 185 |
-
return
|
| 186 |
|
| 187 |
-
# 1. Immediately show the user's message and a "Thinking..." status
|
| 188 |
chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
|
| 189 |
thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Analyzing... Please wait. This may take a minute.\n```")
|
| 190 |
-
yield thinking_message, history_state_list, gr.update()
|
| 191 |
|
| 192 |
-
# 2. Call the powerful (and slow) backend engine
|
| 193 |
ai_response_text = handle(prompt, files)
|
| 194 |
|
| 195 |
-
# 3. Replace "Thinking..." with the final AI response
|
| 196 |
final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
|
| 197 |
|
| 198 |
-
# 4. Save the completed assessment to our history state
|
| 199 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 200 |
file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
|
| 201 |
|
| 202 |
new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
|
| 203 |
updated_history = history_state_list + [new_assessment]
|
| 204 |
|
| 205 |
-
# 5. Create user-friendly labels for the history dropdown
|
| 206 |
history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
|
| 207 |
|
| 208 |
-
# 6. Yield the final, complete state to the UI
|
| 209 |
yield final_chat, updated_history, gr.update(choices=history_labels)
|
| 210 |
|
| 211 |
def view_history(selection, history_state_list):
|
|
@@ -237,6 +246,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 237 |
terms_link.click(lambda: gr.update(visible=True), outputs=[terms_modal])
|
| 238 |
close_terms_btn.click(lambda: gr.update(visible=False), outputs=[terms_modal])
|
| 239 |
|
|
|
|
| 240 |
if __name__ == "__main__":
|
| 241 |
if not os.getenv("COHERE_API_KEY"):
|
| 242 |
print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
|
|
|
|
| 37 |
if not isinstance(s, str): return s
|
| 38 |
return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
|
| 39 |
|
| 40 |
+
# THIS FUNCTION IS NOW UPGRADED
|
| 41 |
+
def _create_enhanced_prompt(user_scenario: str, file_context: str) -> str:
|
| 42 |
+
"""Uses an LLM to pre-process the user's prompt and adds critical data context."""
|
| 43 |
prompt_for_planner = f"""
|
| 44 |
+
You are an expert data analysis project manager. Your task is to create a clear, structured brief for a data analysis AI based on the user's scenario and the provided data context.
|
| 45 |
+
|
| 46 |
+
--- DATA CONTEXT ---
|
| 47 |
+
{file_context}
|
| 48 |
+
The dataframes are available in a list, indexed as df1, df2, and so on, in the order they are listed above. Your primary task is to use these dataframes to answer the user's questions. Do not use hypothetical data.
|
| 49 |
+
|
|
|
|
| 50 |
--- USER'S SCENARIO ---
|
| 51 |
{user_scenario}
|
| 52 |
+
|
| 53 |
+
--- YOUR TASK ---
|
| 54 |
+
Based on BOTH the user's scenario and the data context, extract the following:
|
| 55 |
+
1. Primary Objective: A one-sentence summary of the user's main goal.
|
| 56 |
+
2. Key Tasks: A numbered list of ALL specific questions the user wants answered using the provided data.
|
| 57 |
+
3. Required Output Format: A description of how the user wants the final answer structured.
|
| 58 |
+
CRITICAL INSTRUCTION: Tell the data analyst that it MUST answer ALL of the key tasks before providing its final answer.
|
| 59 |
"""
|
| 60 |
structured_brief = cohere_chat(prompt_for_planner)
|
| 61 |
return structured_brief if structured_brief else user_scenario
|
|
|
|
| 85 |
|
| 86 |
if file_paths:
|
| 87 |
dataframes = []
|
| 88 |
+
file_names = []
|
| 89 |
for p in file_paths:
|
| 90 |
if p.endswith('.csv'):
|
| 91 |
try:
|
| 92 |
df = pd.read_csv(p)
|
| 93 |
dataframes.append(df)
|
| 94 |
+
file_names.append(os.path.basename(p))
|
| 95 |
except UnicodeDecodeError:
|
| 96 |
print(f"Warning: Failed to read {os.path.basename(p)} with UTF-8. Falling back to latin1 encoding.")
|
| 97 |
df = pd.read_csv(p, encoding='latin1')
|
| 98 |
dataframes.append(df)
|
| 99 |
+
file_names.append(os.path.basename(p))
|
| 100 |
|
| 101 |
if not dataframes: return "Please upload at least one CSV file."
|
| 102 |
|
| 103 |
+
# Create the crucial file context string
|
| 104 |
+
file_context_string = "The user has provided the following data files for your analysis: " + ", ".join(file_names)
|
| 105 |
+
|
| 106 |
llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
|
| 107 |
+
enhanced_prompt = _create_enhanced_prompt(safe_in, file_context_string)
|
| 108 |
|
| 109 |
AGENT_PREFIX = """
|
| 110 |
+
You are a data analysis agent. You have access to one or more pandas dataframes. Your task is to use the provided dataframes to answer the user's questions.
|
| 111 |
You MUST respond in one of two formats.
|
| 112 |
|
| 113 |
+
FORMAT 1: To perform a task.
|
| 114 |
+
Thought: Your step-by-step reasoning for using the data.
|
| 115 |
Action: python_repl_ast
|
| 116 |
+
Action Input: The Python code to run on the dataframes (df1, df2, etc.).
|
| 117 |
|
| 118 |
+
FORMAT 2: To give the final answer.
|
| 119 |
+
Thought: I have now completed all the tasks and can provide the final report based on the real data.
|
| 120 |
Final Answer: The complete answer, structured as the user requested.
|
| 121 |
|
| 122 |
+
CRITICAL RULE: NEVER use hypothetical data. ALWAYS use the provided dataframes to generate your results.
|
| 123 |
"""
|
| 124 |
|
| 125 |
agent = create_pandas_dataframe_agent(
|
|
|
|
| 144 |
# ---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
|
| 145 |
with gr.Blocks(theme="soft", css="style.css") as demo:
|
| 146 |
assessment_history = gr.State([])
|
| 147 |
+
# ... (The rest of the UI code is identical to the last version) ...
|
| 148 |
+
# ... (For brevity, I will omit it, but you should use the full UI code from the previous step)
|
| 149 |
# --- MODALS (POPUPS) DEFINED FIRST, INITIALLY HIDDEN ---
|
| 150 |
with gr.Group(visible=False) as privacy_modal:
|
| 151 |
with gr.Blocks():
|
|
|
|
| 176 |
with gr.Column(scale=2):
|
| 177 |
with gr.Tabs():
|
| 178 |
with gr.TabItem("Current Assessment", id=0):
|
| 179 |
+
chat_history_output = gr.Chatbot(
|
| 180 |
+
label="Analysis Output",
|
| 181 |
+
type="messages",
|
| 182 |
+
height=600
|
| 183 |
+
)
|
| 184 |
with gr.TabItem("Assessment History", id=1):
|
| 185 |
gr.Markdown("## Review Past Assessments")
|
| 186 |
history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
|
|
|
|
| 193 |
terms_link = gr.Button("Terms of Service", variant="link")
|
| 194 |
|
| 195 |
# --- UI LOGIC ---
|
|
|
|
|
|
|
| 196 |
def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
|
| 197 |
if not prompt or not files:
|
| 198 |
gr.Warning("Please provide both a prompt and at least one data file.")
|
|
|
|
| 199 |
yield chat_history_list, history_state_list, gr.update()
|
| 200 |
+
return
|
| 201 |
|
|
|
|
| 202 |
chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
|
| 203 |
thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Analyzing... Please wait. This may take a minute.\n```")
|
| 204 |
+
yield thinking_message, history_state_list, gr.update()
|
| 205 |
|
|
|
|
| 206 |
ai_response_text = handle(prompt, files)
|
| 207 |
|
|
|
|
| 208 |
final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
|
| 209 |
|
|
|
|
| 210 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 211 |
file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
|
| 212 |
|
| 213 |
new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
|
| 214 |
updated_history = history_state_list + [new_assessment]
|
| 215 |
|
|
|
|
| 216 |
history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
|
| 217 |
|
|
|
|
| 218 |
yield final_chat, updated_history, gr.update(choices=history_labels)
|
| 219 |
|
| 220 |
def view_history(selection, history_state_list):
|
|
|
|
| 246 |
terms_link.click(lambda: gr.update(visible=True), outputs=[terms_modal])
|
| 247 |
close_terms_btn.click(lambda: gr.update(visible=False), outputs=[terms_modal])
|
| 248 |
|
| 249 |
+
|
| 250 |
if __name__ == "__main__":
|
| 251 |
if not os.getenv("COHERE_API_KEY"):
|
| 252 |
print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
|