mic3333 Claude commited on
Commit
b19a39d
·
1 Parent(s): 9b83da2

Integrate OpenRouter AI API for enhanced data analysis

Browse files

- Add OpenRouter ChatOpenAI integration with Microsoft Phi-4 model
- Replace keyword-based responses with intelligent LLM-powered analysis
- Add proper prompt templating for data analysis questions
- Implement fallback error handling for API unavailability
- Configure environment variable support for OPENROUTER_API_KEY

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +92 -61
app.py CHANGED
@@ -6,12 +6,48 @@ import plotly.express as px
6
  import plotly.graph_objects as go
7
  from dash import Dash, html, dcc, Input, Output, State, callback_context
8
  import dash_bootstrap_components as dbc
 
 
 
9
 
10
  # Fixed Langchain imports (using langchain-community)
11
  from langchain_community.embeddings import HuggingFaceEmbeddings
12
  from langchain_community.vectorstores import FAISS
13
  from langchain.text_splitter import RecursiveCharacterTextSplitter
14
  from langchain.schema import Document
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # Initialize Dash app
17
  app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
@@ -177,80 +213,75 @@ def create_vector_store(df):
177
  return False
178
 
179
  def get_ai_response(question, df):
180
- """Get AI response using RAG"""
181
  global vector_store
182
 
183
  if vector_store is None:
184
  return "Please upload data first to enable AI features."
185
 
186
  try:
187
- # Simple keyword-based responses for demo
188
- question_lower = question.lower()
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
- if "summary" in question_lower or "overview" in question_lower:
191
- return f"""📊 **Data Summary**:
192
- - **Shape**: {df.shape[0]} rows × {df.shape[1]} columns
193
- - **Columns**: {', '.join(df.columns)}
194
- - **Missing values**: {df.isnull().sum().sum()} total
195
- - **Numeric columns**: {len(df.select_dtypes(include=['number']).columns)}
 
 
 
 
 
 
 
 
 
 
 
196
  """
 
197
 
198
- elif "correlation" in question_lower or "relationship" in question_lower:
199
- numeric_cols = df.select_dtypes(include=['number']).columns
200
- if len(numeric_cols) > 1:
201
- corr = df[numeric_cols].corr()
202
- # Find highest correlation
203
- corr_vals = corr.abs().unstack().sort_values(ascending=False)
204
- corr_vals = corr_vals[corr_vals < 1.0] # Remove self-correlations
205
- if not corr_vals.empty:
206
- top_corr = corr_vals.iloc[0]
207
- col1, col2 = corr_vals.index[0]
208
- return f"""🔗 **Correlation Analysis**:
209
- - Strongest relationship: **{col1}** and **{col2}** (r = {top_corr:.3f})
210
- - This suggests a {'strong' if top_corr > 0.7 else 'moderate' if top_corr > 0.5 else 'weak'} correlation
211
- """
212
- return "No numeric columns found for correlation analysis."
213
 
214
- elif "missing" in question_lower or "null" in question_lower:
215
- missing = df.isnull().sum()
216
- missing = missing[missing > 0]
217
- if missing.empty:
218
- return "✅ **Great news!** No missing values found in your dataset."
219
- else:
220
- return f"""⚠️ **Missing Data Found**:
221
- {missing.to_string()}
222
-
223
- **Recommendation**: Consider filling or removing missing values before analysis.
224
- """
225
 
226
- elif "recommend" in question_lower or "suggest" in question_lower:
227
- suggestions = []
228
- numeric_cols = df.select_dtypes(include=['number']).columns
229
- categorical_cols = df.select_dtypes(include=['object']).columns
230
-
231
- if len(numeric_cols) >= 2:
232
- suggestions.append("📈 Try scatter plots to explore relationships between numeric variables")
233
- if len(categorical_cols) > 0 and len(numeric_cols) > 0:
234
- suggestions.append("📊 Create bar charts to compare numeric values across categories")
235
- if len(numeric_cols) > 0:
236
- suggestions.append("📉 Use histograms to understand data distributions")
237
-
238
- return f"""💡 **Analysis Suggestions**:
239
- {chr(10).join(['• ' + s for s in suggestions])}
240
- """
241
 
242
- else:
243
- return f"""🤖 **AI Assistant**: I can help you with:
244
- - Data summaries and overviews
245
- - Correlation and relationship analysis
246
- - Missing data detection
247
- - Visualization recommendations
248
-
249
- Try asking: "What's the summary?" or "Any missing data?"
250
- """
251
-
252
  except Exception as e:
253
- return f"Error processing question: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
254
 
255
  def parse_contents(contents, filename):
256
  """Parse uploaded file contents"""
 
6
  import plotly.graph_objects as go
7
  from dash import Dash, html, dcc, Input, Output, State, callback_context
8
  import dash_bootstrap_components as dbc
9
+ from typing import Optional
10
+ from dotenv import load_dotenv
11
+ from pydantic import Field, SecretStr
12
 
13
  # Fixed Langchain imports (using langchain-community)
14
  from langchain_community.embeddings import HuggingFaceEmbeddings
15
  from langchain_community.vectorstores import FAISS
16
  from langchain.text_splitter import RecursiveCharacterTextSplitter
17
  from langchain.schema import Document
18
+ from langchain_openai import ChatOpenAI
19
+ from langchain_core.prompts import PromptTemplate
20
+ from langchain_core.utils.utils import secret_from_env
21
+ from langchain.chains import LLMChain
22
+
23
+ # Load environment variables
24
+ load_dotenv()
25
+
26
+ class ChatOpenRouter(ChatOpenAI):
27
+ openai_api_key: Optional[SecretStr] = Field(
28
+ alias="api_key", default_factory=lambda: secret_from_env("OPENROUTER_API_KEY", default=None)
29
+ )
30
+
31
+ @property
32
+ def lc_secrets(self) -> dict[str, str]:
33
+ return {"openai_api_key": "OPENROUTER_API_KEY"}
34
+
35
+ def __init__(self,
36
+ openai_api_key: Optional[str] = None,
37
+ **kwargs):
38
+ openai_api_key = openai_api_key or os.environ.get("OPENROUTER_API_KEY")
39
+ super().__init__(base_url="https://openrouter.ai/api/v1", openai_api_key=openai_api_key, **kwargs)
40
+
41
+ # Initialize OpenRouter model
42
+ openrouter_model = ChatOpenRouter(
43
+ model_name="microsoft/phi-4-reasoning-plus",
44
+ temperature=0.3,
45
+ max_tokens=1500,
46
+ top_p=0.9,
47
+ frequency_penalty=0.0,
48
+ presence_penalty=0.0,
49
+ streaming=False
50
+ )
51
 
52
  # Initialize Dash app
53
  app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
 
213
  return False
214
 
215
  def get_ai_response(question, df):
216
+ """Get AI response using OpenRouter LLM and RAG"""
217
  global vector_store
218
 
219
  if vector_store is None:
220
  return "Please upload data first to enable AI features."
221
 
222
  try:
223
+ # Create data context for the LLM
224
+ data_context = f"""
225
+ Dataset Information:
226
+ - Shape: {df.shape[0]} rows × {df.shape[1]} columns
227
+ - Columns: {', '.join(df.columns)}
228
+ - Data Types: {df.dtypes.to_dict()}
229
+ - Missing Values: {df.isnull().sum().to_dict()}
230
+
231
+ Sample Data (first 5 rows):
232
+ {df.head().to_string()}
233
+
234
+ Summary Statistics:
235
+ {df.describe().to_string()}
236
+ """
237
 
238
+ # Create a prompt template for data analysis
239
+ prompt_template = PromptTemplate(
240
+ input_variables=["question", "data_context"],
241
+ template="""
242
+ You are a professional data analyst AI assistant. Based on the provided dataset information, answer the user's question with clear, actionable insights.
243
+
244
+ Dataset Context:
245
+ {data_context}
246
+
247
+ User Question: {question}
248
+
249
+ Please provide a helpful, accurate response with:
250
+ 1. Direct answer to the question
251
+ 2. Key insights or patterns you notice
252
+ 3. Recommendations or next steps if applicable
253
+
254
+ Use emojis and markdown formatting to make your response engaging and easy to read.
255
  """
256
+ )
257
 
258
+ # Create LLM chain
259
+ llm_chain = LLMChain(
260
+ llm=openrouter_model,
261
+ prompt=prompt_template
262
+ )
 
 
 
 
 
 
 
 
 
 
263
 
264
+ # Get response from OpenRouter
265
+ response = llm_chain.run(
266
+ question=question,
267
+ data_context=data_context
268
+ )
 
 
 
 
 
 
269
 
270
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
 
 
 
 
 
 
 
 
 
 
272
  except Exception as e:
273
+ # Fallback to basic responses if OpenRouter fails
274
+ print(f"OpenRouter error: {e}")
275
+ return f"""🤖 **AI Assistant** (Limited Mode):
276
+ I encountered an issue with the AI service. Here's basic info about your data:
277
+
278
+ 📊 **Quick Summary**:
279
+ - Shape: {df.shape[0]} rows × {df.shape[1]} columns
280
+ - Columns: {', '.join(df.columns)}
281
+ - Missing values: {df.isnull().sum().sum()} total
282
+
283
+ Please check your OPENROUTER_API_KEY configuration.
284
+ """
285
 
286
  def parse_contents(contents, filename):
287
  """Parse uploaded file contents"""