Spaces:
Sleeping
Sleeping
Rajan Sharma
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -37,24 +37,30 @@ def _sanitize_text(s: str) -> str:
|
|
| 37 |
if not isinstance(s, str): return s
|
| 38 |
return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
|
| 39 |
|
|
|
|
| 40 |
def _create_enhanced_prompt(user_scenario: str, file_context: str) -> str:
|
| 41 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 42 |
prompt_for_planner = f"""
|
| 43 |
-
You are
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
--- DATA CONTEXT ---
|
| 46 |
{file_context}
|
| 47 |
-
The
|
| 48 |
|
| 49 |
--- USER'S SCENARIO ---
|
| 50 |
{user_scenario}
|
| 51 |
|
| 52 |
--- YOUR TASK ---
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
CRITICAL INSTRUCTION: Tell the data analyst that it MUST answer ALL of the key tasks before providing its final answer.
|
| 58 |
"""
|
| 59 |
structured_brief = cohere_chat(prompt_for_planner)
|
| 60 |
return structured_brief if structured_brief else user_scenario
|
|
@@ -68,7 +74,7 @@ def ping_cohere() -> str:
|
|
| 68 |
cli = _co_client()
|
| 69 |
if not cli: return "Cohere client not initialized. Is COHERE_API_KEY set?"
|
| 70 |
vecs = cohere_embed(["hello", "world"])
|
| 71 |
-
return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)" if vecs else "Cohere reachable
|
| 72 |
except Exception as e:
|
| 73 |
return f"Cohere ping failed: {e}"
|
| 74 |
|
|
@@ -92,41 +98,41 @@ def handle(user_msg: str, files: list) -> str:
|
|
| 92 |
dataframes.append(df)
|
| 93 |
file_names.append(os.path.basename(p))
|
| 94 |
except UnicodeDecodeError:
|
| 95 |
-
print(f"Warning:
|
| 96 |
df = pd.read_csv(p, encoding='latin1')
|
| 97 |
dataframes.append(df)
|
| 98 |
file_names.append(os.path.basename(p))
|
| 99 |
|
| 100 |
if not dataframes: return "Please upload at least one CSV file."
|
| 101 |
|
| 102 |
-
file_context_string = "The user has provided the following data files
|
| 103 |
llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
|
| 104 |
enhanced_prompt = _create_enhanced_prompt(safe_in, file_context_string)
|
| 105 |
|
| 106 |
-
# --- THE FINAL
|
| 107 |
AGENT_PREFIX = """
|
| 108 |
-
Your job is to
|
| 109 |
-
You
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
|
|
|
| 123 |
"""
|
| 124 |
|
| 125 |
agent = create_pandas_dataframe_agent(
|
| 126 |
llm, dataframes, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
| 127 |
verbose=True, allow_dangerous_code=True, prefix=AGENT_PREFIX, max_iterations=50,
|
| 128 |
-
|
| 129 |
-
handle_parsing_errors=True
|
| 130 |
)
|
| 131 |
result = agent.invoke({"input": enhanced_prompt})
|
| 132 |
return _sanitize_text(result.get("output", "No output generated."))
|
|
@@ -146,7 +152,7 @@ TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
|
|
| 146 |
# ---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
|
| 147 |
with gr.Blocks(theme="soft", css="style.css") as demo:
|
| 148 |
assessment_history = gr.State([])
|
| 149 |
-
# ... (The rest of the UI code is identical to the last version) ...
|
| 150 |
with gr.Group(visible=False) as privacy_modal:
|
| 151 |
with gr.Blocks():
|
| 152 |
gr.Markdown(PRIVACY_POLICY_TEXT)
|
|
@@ -188,7 +194,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 188 |
return
|
| 189 |
|
| 190 |
chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
|
| 191 |
-
thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠
|
| 192 |
yield thinking_message, history_state_list, gr.update()
|
| 193 |
ai_response_text = handle(prompt, files)
|
| 194 |
final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
|
|
|
|
| 37 |
if not isinstance(s, str): return s
|
| 38 |
return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
|
| 39 |
|
| 40 |
+
# --- THE FINAL FIX (PART 1): The "Senior Analyst" AI ---
|
| 41 |
def _create_enhanced_prompt(user_scenario: str, file_context: str) -> str:
|
| 42 |
+
"""
|
| 43 |
+
Uses an LLM to act as a "Senior Analyst", breaking the complex user
|
| 44 |
+
scenario into a clear, step-by-step plan for the agent.
|
| 45 |
+
"""
|
| 46 |
prompt_for_planner = f"""
|
| 47 |
+
You are a Senior Data Analyst. Your job is to create a clear, step-by-step execution plan for a Junior AI Data Analyst.
|
| 48 |
+
The user has provided a complex scenario and a list of data files. The Junior Analyst gets confused by long prompts and can get stuck in loops.
|
| 49 |
+
|
| 50 |
+
Your plan must be simple, clear, and sequential.
|
| 51 |
|
| 52 |
--- DATA CONTEXT ---
|
| 53 |
{file_context}
|
| 54 |
+
The Junior Analyst has access to these files in a list of pandas dataframes (df1, df2, etc.), in the order listed above.
|
| 55 |
|
| 56 |
--- USER'S SCENARIO ---
|
| 57 |
{user_scenario}
|
| 58 |
|
| 59 |
--- YOUR TASK ---
|
| 60 |
+
Create a "Step-by-Step Execution Plan" for the Junior Analyst. Tell it exactly what to do, one task at a time, referencing the correct dataframe (df1, df2, etc.).
|
| 61 |
+
Instruct it to perform all data preparation first, then the analysis, then the recommendations.
|
| 62 |
+
Tell it that it MUST complete ALL steps in the plan before providing the final report.
|
| 63 |
+
This plan will be given to the Junior Analyst. Make it easy to follow.
|
|
|
|
| 64 |
"""
|
| 65 |
structured_brief = cohere_chat(prompt_for_planner)
|
| 66 |
return structured_brief if structured_brief else user_scenario
|
|
|
|
| 74 |
cli = _co_client()
|
| 75 |
if not cli: return "Cohere client not initialized. Is COHERE_API_KEY set?"
|
| 76 |
vecs = cohere_embed(["hello", "world"])
|
| 77 |
+
return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)" if vecs else "Cohere reachable."
|
| 78 |
except Exception as e:
|
| 79 |
return f"Cohere ping failed: {e}"
|
| 80 |
|
|
|
|
| 98 |
dataframes.append(df)
|
| 99 |
file_names.append(os.path.basename(p))
|
| 100 |
except UnicodeDecodeError:
|
| 101 |
+
print(f"Warning: Reading {os.path.basename(p)} with fallback latin1 encoding.")
|
| 102 |
df = pd.read_csv(p, encoding='latin1')
|
| 103 |
dataframes.append(df)
|
| 104 |
file_names.append(os.path.basename(p))
|
| 105 |
|
| 106 |
if not dataframes: return "Please upload at least one CSV file."
|
| 107 |
|
| 108 |
+
file_context_string = "The user has provided the following data files: " + ", ".join(file_names)
|
| 109 |
llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
|
| 110 |
enhanced_prompt = _create_enhanced_prompt(safe_in, file_context_string)
|
| 111 |
|
| 112 |
+
# --- THE FINAL FIX (PART 2): Stricter Agent with Error Handling Rule ---
|
| 113 |
AGENT_PREFIX = """
|
| 114 |
+
You are a Junior AI Data Analyst. Your job is to execute the step-by-step plan provided by your Senior Analyst using Python and pandas.
|
| 115 |
+
You have access to dataframes named df1, df2, etc.
|
| 116 |
+
|
| 117 |
+
You MUST follow these rules:
|
| 118 |
+
|
| 119 |
+
1. **EXECUTE THE PLAN:** Follow the execution plan exactly, one step at a time.
|
| 120 |
+
2. **FORMATTING:** Your response MUST be in one of two formats. NEVER mix them.
|
| 121 |
+
* **To run code:**
|
| 122 |
+
Thought: Your reasoning for the code you are about to run to complete the current step.
|
| 123 |
+
Action: python_repl_ast
|
| 124 |
+
Action Input: The single line of python code to run.
|
| 125 |
+
* **To give the final answer:**
|
| 126 |
+
Thought: I have finished all steps in the plan and can now provide the final report.
|
| 127 |
+
Final Answer: The complete, final answer, formatted as a concise report.
|
| 128 |
+
|
| 129 |
+
3. **ERROR HANDLING:** If your code produces an error, DO NOT try the same code again. Analyze the error message and try a DIFFERENT approach to solve the step. If you are stuck, say so.
|
| 130 |
"""
|
| 131 |
|
| 132 |
agent = create_pandas_dataframe_agent(
|
| 133 |
llm, dataframes, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
| 134 |
verbose=True, allow_dangerous_code=True, prefix=AGENT_PREFIX, max_iterations=50,
|
| 135 |
+
handle_parsing_errors=True
|
|
|
|
| 136 |
)
|
| 137 |
result = agent.invoke({"input": enhanced_prompt})
|
| 138 |
return _sanitize_text(result.get("output", "No output generated."))
|
|
|
|
| 152 |
# ---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
|
| 153 |
with gr.Blocks(theme="soft", css="style.css") as demo:
|
| 154 |
assessment_history = gr.State([])
|
| 155 |
+
# ... (The rest of the UI code is identical to the last working version) ...
|
| 156 |
with gr.Group(visible=False) as privacy_modal:
|
| 157 |
with gr.Blocks():
|
| 158 |
gr.Markdown(PRIVACY_POLICY_TEXT)
|
|
|
|
| 194 |
return
|
| 195 |
|
| 196 |
chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
|
| 197 |
+
thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Formulating execution plan... Please wait.\n```")
|
| 198 |
yield thinking_message, history_state_list, gr.update()
|
| 199 |
ai_response_text = handle(prompt, files)
|
| 200 |
final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
|