AbhayVG commited on
Commit
87e31d5
·
verified ·
1 Parent(s): 219bd2a

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +772 -773
  2. ncap_funding_data.pkl +3 -0
  3. src.py +571 -550
  4. states_data.pkl +3 -0
app.py CHANGED
@@ -1,774 +1,773 @@
1
- import streamlit as st
2
- import os
3
- import json
4
- import pandas as pd
5
- import random
6
- from os.path import join
7
- from datetime import datetime
8
- from src import (
9
- preprocess_and_load_df,
10
- load_agent,
11
- ask_agent,
12
- decorate_with_code,
13
- show_response,
14
- get_from_user,
15
- load_smart_df,
16
- ask_question,
17
- )
18
- from dotenv import load_dotenv
19
- from langchain_groq import ChatGroq
20
- from langchain_google_genai import ChatGoogleGenerativeAI
21
- from streamlit_feedback import streamlit_feedback
22
- from huggingface_hub import HfApi
23
- from datasets import load_dataset, get_dataset_config_info, Dataset
24
- from PIL import Image
25
- import time
26
- import uuid
27
-
28
- # Page config with beautiful theme
29
- st.set_page_config(
30
- page_title="VayuChat - AI Air Quality Assistant",
31
- page_icon="🌬️",
32
- layout="wide",
33
- initial_sidebar_state="expanded"
34
- )
35
-
36
- # Custom CSS for beautiful styling
37
- st.markdown("""
38
- <style>
39
- /* Clean app background */
40
- .stApp {
41
- background-color: #ffffff;
42
- color: #212529;
43
- font-family: 'Segoe UI', sans-serif;
44
- }
45
-
46
- /* Sidebar */
47
- [data-testid="stSidebar"] {
48
- background-color: #f8f9fa;
49
- border-right: 1px solid #dee2e6;
50
- padding: 1rem;
51
- }
52
-
53
- /* Main title */
54
- .main-title {
55
- text-align: center;
56
- color: #343a40;
57
- font-size: 2.5rem;
58
- font-weight: 700;
59
- margin-bottom: 0.5rem;
60
- }
61
-
62
- /* Subtitle */
63
- .subtitle {
64
- text-align: center;
65
- color: #6c757d;
66
- font-size: 1.1rem;
67
- margin-bottom: 1.5rem;
68
- }
69
-
70
- /* Instructions */
71
- .instructions {
72
- background-color: #f1f3f5;
73
- border-left: 4px solid #0d6efd;
74
- padding: 1rem;
75
- margin-bottom: 1.5rem;
76
- border-radius: 6px;
77
- color: #495057;
78
- text-align: left;
79
- }
80
-
81
- /* Quick prompt buttons */
82
- .quick-prompt-container {
83
- display: flex;
84
- flex-wrap: wrap;
85
- gap: 8px;
86
- margin-bottom: 1.5rem;
87
- padding: 1rem;
88
- background-color: #f8f9fa;
89
- border-radius: 10px;
90
- border: 1px solid #dee2e6;
91
- }
92
-
93
- .quick-prompt-btn {
94
- background-color: #0d6efd;
95
- color: white;
96
- border: none;
97
- padding: 8px 16px;
98
- border-radius: 20px;
99
- font-size: 0.9rem;
100
- cursor: pointer;
101
- transition: all 0.2s ease;
102
- white-space: nowrap;
103
- }
104
-
105
- .quick-prompt-btn:hover {
106
- background-color: #0b5ed7;
107
- transform: translateY(-2px);
108
- }
109
-
110
- /* User message styling */
111
- .user-message {
112
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
113
- color: white;
114
- padding: 15px 20px;
115
- border-radius: 20px 20px 5px 20px;
116
- margin: 10px 0;
117
- margin-left: auto;
118
- margin-right: 0;
119
- max-width: 80%;
120
- position: relative;
121
- box-shadow: 0 2px 10px rgba(0,0,0,0.1);
122
- }
123
-
124
- .user-info {
125
- font-size: 0.8rem;
126
- opacity: 0.8;
127
- margin-bottom: 5px;
128
- text-align: right;
129
- }
130
-
131
- /* Assistant message styling */
132
- .assistant-message {
133
- background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
134
- color: white;
135
- padding: 15px 20px;
136
- border-radius: 20px 20px 20px 5px;
137
- margin: 10px 0;
138
- margin-left: 0;
139
- margin-right: auto;
140
- max-width: 80%;
141
- position: relative;
142
- box-shadow: 0 2px 10px rgba(0,0,0,0.1);
143
- }
144
-
145
- .assistant-info {
146
- font-size: 0.8rem;
147
- opacity: 0.8;
148
- margin-bottom: 5px;
149
- }
150
-
151
- /* Processing indicator */
152
- .processing-indicator {
153
- background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
154
- color: #333;
155
- padding: 15px 20px;
156
- border-radius: 20px 20px 20px 5px;
157
- margin: 10px 0;
158
- margin-left: 0;
159
- margin-right: auto;
160
- max-width: 80%;
161
- position: relative;
162
- box-shadow: 0 2px 10px rgba(0,0,0,0.1);
163
- animation: pulse 2s infinite;
164
- }
165
-
166
- @keyframes pulse {
167
- 0% { opacity: 1; }
168
- 50% { opacity: 0.7; }
169
- 100% { opacity: 1; }
170
- }
171
-
172
- /* Feedback box */
173
- .feedback-section {
174
- background-color: #f8f9fa;
175
- border: 1px solid #dee2e6;
176
- padding: 1rem;
177
- border-radius: 8px;
178
- margin: 1rem 0;
179
- }
180
-
181
- /* Success and error messages */
182
- .success-message {
183
- background-color: #d1e7dd;
184
- color: #0f5132;
185
- padding: 1rem;
186
- border-radius: 6px;
187
- border: 1px solid #badbcc;
188
- }
189
-
190
- .error-message {
191
- background-color: #f8d7da;
192
- color: #842029;
193
- padding: 1rem;
194
- border-radius: 6px;
195
- border: 1px solid #f5c2c7;
196
- }
197
-
198
- /* Chat input */
199
- .stChatInput {
200
- border-radius: 6px;
201
- border: 1px solid #ced4da;
202
- background: #ffffff;
203
- }
204
-
205
- /* Button */
206
- .stButton > button {
207
- background-color: #0d6efd;
208
- color: white;
209
- border-radius: 6px;
210
- padding: 0.5rem 1.25rem;
211
- border: none;
212
- font-weight: 600;
213
- transition: background-color 0.2s ease;
214
- }
215
-
216
- .stButton > button:hover {
217
- background-color: #0b5ed7;
218
- }
219
-
220
- /* Code details styling */
221
- .code-details {
222
- background-color: #f8f9fa;
223
- border: 1px solid #dee2e6;
224
- border-radius: 8px;
225
- padding: 10px;
226
- margin-top: 10px;
227
- }
228
-
229
- /* Hide default menu and footer */
230
- #MainMenu {visibility: hidden;}
231
- footer {visibility: hidden;}
232
- header {visibility: hidden;}
233
-
234
- /* Auto scroll */
235
- .main-container {
236
- height: 70vh;
237
- overflow-y: auto;
238
- }
239
- </style>
240
- """, unsafe_allow_html=True)
241
-
242
- # Auto-scroll JavaScript
243
- st.markdown("""
244
- <script>
245
- function scrollToBottom() {
246
- setTimeout(function() {
247
- const mainContainer = document.querySelector('.main-container');
248
- if (mainContainer) {
249
- mainContainer.scrollTop = mainContainer.scrollHeight;
250
- }
251
- window.scrollTo(0, document.body.scrollHeight);
252
- }, 100);
253
- }
254
- </script>
255
- """, unsafe_allow_html=True)
256
-
257
- # FORCE reload environment variables
258
- load_dotenv(override=True)
259
-
260
- # Get API keys
261
- Groq_Token = os.getenv("GROQ_API_KEY")
262
- hf_token = os.getenv("HF_TOKEN")
263
- gemini_token = os.getenv("GEMINI_TOKEN")
264
-
265
- models = {
266
- "gpt-oss-20b": "openai/gpt-oss-20b",
267
- "gpt-oss-120b": "openai/gpt-oss-120b",
268
- "llama3.1": "llama-3.1-8b-instant",
269
- "llama3.3": "llama-3.3-70b-versatile",
270
- "deepseek-R1": "deepseek-r1-distill-llama-70b",
271
- "llama4 maverik":"meta-llama/llama-4-maverick-17b-128e-instruct",
272
- "llama4 scout":"meta-llama/llama-4-scout-17b-16e-instruct",
273
- "gemini-pro": "gemini-1.5-pro"
274
- }
275
-
276
- self_path = os.path.dirname(os.path.abspath(__file__))
277
-
278
- # Initialize session ID for this session
279
- if "session_id" not in st.session_state:
280
- st.session_state.session_id = str(uuid.uuid4())
281
-
282
- def upload_feedback(feedback, error, output, last_prompt, code, status):
283
- """Enhanced feedback upload function with better logging and error handling"""
284
- try:
285
- if not hf_token or hf_token.strip() == "":
286
- st.warning("⚠️ Cannot upload feedback - HF_TOKEN not available")
287
- return False
288
-
289
- # Create comprehensive feedback data
290
- feedback_data = {
291
- "timestamp": datetime.now().isoformat(),
292
- "session_id": st.session_state.session_id,
293
- "feedback_score": feedback.get("score", ""),
294
- "feedback_comment": feedback.get("text", ""),
295
- "user_prompt": last_prompt,
296
- "ai_output": str(output),
297
- "generated_code": code or "",
298
- "error_message": error or "",
299
- "is_image_output": status.get("is_image", False),
300
- "success": not bool(error)
301
- }
302
-
303
- # Create unique folder name with timestamp
304
- timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
305
- random_id = str(uuid.uuid4())[:8]
306
- folder_name = f"feedback_{timestamp_str}_{random_id}"
307
-
308
- # Create markdown feedback file
309
- markdown_content = f"""# VayuChat Feedback Report
310
-
311
- ## Session Information
312
- - **Timestamp**: {feedback_data['timestamp']}
313
- - **Session ID**: {feedback_data['session_id']}
314
-
315
- ## User Interaction
316
- **Prompt**: {feedback_data['user_prompt']}
317
-
318
- ## AI Response
319
- **Output**: {feedback_data['ai_output']}
320
-
321
- ## Generated Code
322
- ```python
323
- {feedback_data['generated_code']}
324
- ```
325
-
326
- ## Technical Details
327
- - **Error Message**: {feedback_data['error_message']}
328
- - **Is Image Output**: {feedback_data['is_image_output']}
329
- - **Success**: {feedback_data['success']}
330
-
331
- ## User Feedback
332
- - **Score**: {feedback_data['feedback_score']}
333
- - **Comments**: {feedback_data['feedback_comment']}
334
- """
335
-
336
- # Save markdown file locally
337
- markdown_filename = f"{folder_name}.md"
338
- markdown_local_path = f"/tmp/{markdown_filename}"
339
-
340
- with open(markdown_local_path, "w", encoding="utf-8") as f:
341
- f.write(markdown_content)
342
-
343
- # Upload to Hugging Face
344
- api = HfApi(token=hf_token)
345
-
346
- # Upload markdown feedback
347
- api.upload_file(
348
- path_or_fileobj=markdown_local_path,
349
- path_in_repo=f"data/{markdown_filename}",
350
- repo_id="SustainabilityLabIITGN/VayuChat_Feedback",
351
- repo_type="dataset",
352
- )
353
-
354
- # Upload image if it exists and is an image output
355
- if status.get("is_image", False) and isinstance(output, str) and os.path.exists(output):
356
- try:
357
- image_filename = f"{folder_name}_plot.png"
358
- api.upload_file(
359
- path_or_fileobj=output,
360
- path_in_repo=f"data/{image_filename}",
361
- repo_id="SustainabilityLabIITGN/VayuChat_Feedback",
362
- repo_type="dataset",
363
- )
364
- except Exception as img_error:
365
- print(f"Error uploading image: {img_error}")
366
-
367
- # Clean up local files
368
- if os.path.exists(markdown_local_path):
369
- os.remove(markdown_local_path)
370
-
371
- st.success("🎉 Feedback uploaded successfully!")
372
- return True
373
-
374
- except Exception as e:
375
- st.error(f"❌ Error uploading feedback: {e}")
376
- print(f"Feedback upload error: {e}")
377
- return False
378
-
379
- # Beautiful header
380
- st.markdown("<h1 class='main-title'>🌬️ VayuChat</h1>", unsafe_allow_html=True)
381
-
382
- st.markdown("""
383
- <div class='subtitle'>
384
- <strong>AI-Powered Air Quality Insights</strong><br>
385
- Simplifying pollution analysis using conversational AI.
386
- </div>
387
- """, unsafe_allow_html=True)
388
-
389
- st.markdown("""
390
- <div class='instructions'>
391
- <strong>How to Use:</strong><br>
392
- Select a model from the sidebar and ask questions directly in the chat. Use quick prompts below for common queries.
393
- </div>
394
- """, unsafe_allow_html=True)
395
-
396
- os.environ["PANDASAI_API_KEY"] = "$2a$10$gbmqKotzJOnqa7iYOun8eO50TxMD/6Zw1pLI2JEoqncwsNx4XeBS2"
397
-
398
- # Load data with error handling
399
- try:
400
- df = preprocess_and_load_df(join(self_path, "Data.csv"))
401
- st.success("✅ Data loaded successfully!")
402
- except Exception as e:
403
- st.error(f"❌ Error loading data: {e}")
404
- st.stop()
405
-
406
- inference_server = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
407
- image_path = "IITGN_Logo.png"
408
-
409
- # Beautiful sidebar
410
- with st.sidebar:
411
- # Logo and title
412
- col1, col2, col3 = st.columns([1, 2, 1])
413
- with col2:
414
- if os.path.exists(image_path):
415
- st.image(image_path, use_column_width=True)
416
-
417
- # Session info
418
- st.markdown(f"**Session ID**: `{st.session_state.session_id[:8]}...`")
419
-
420
- # Model selection
421
- st.markdown("### 🤖 AI Model Selection")
422
-
423
- # Filter available models
424
- available_models = []
425
- model_names = list(models.keys())
426
- groq_models = []
427
- gemini_models = []
428
- for model_name in model_names:
429
- if "gemini" not in model_name:
430
- groq_models.append(model_name)
431
- else:
432
- gemini_models.append(model_name)
433
- if Groq_Token and Groq_Token.strip():
434
- available_models.extend(groq_models)
435
- if gemini_token and gemini_token.strip():
436
- available_models.extend(gemini_models)
437
-
438
- if not available_models:
439
- st.error("❌ No API keys available! Please set up your API keys in the .env file")
440
- st.stop()
441
-
442
- model_name = st.selectbox(
443
- "Choose your AI assistant:",
444
- available_models,
445
- help="Different models have different strengths. Try them all!"
446
- )
447
-
448
- # Model descriptions
449
- model_descriptions = {
450
- "llama3.1": "🦙 Fast and efficient for general queries",
451
- "llama3.3": "🦙 Most advanced LLaMA model for complex reasoning",
452
- "mistral": "⚡ Balanced performance and speed",
453
- "gemma": "💎 Google's lightweight model",
454
- "gemini-pro": "🧠 Google's most powerful model",
455
- "gpt-oss-20b": "📘 OpenAI's compact open-weight GPT for everyday tasks",
456
- "gpt-oss-120b": "📚 OpenAI's massive open-weight GPT for nuanced responses",
457
- "deepseek-R1": "🔍 DeepSeek's distilled LLaMA model for efficient reasoning",
458
- "llama4 maverik": "🚀 Meta's LLaMA 4 Maverick — high-performance instruction model",
459
- "llama4 scout": "🛰️ Meta's LLaMA 4 Scout — optimized for adaptive reasoning"
460
- }
461
-
462
-
463
- if model_name in model_descriptions:
464
- st.info(model_descriptions[model_name])
465
-
466
- st.markdown("---")
467
-
468
- # Logging status
469
- st.markdown("### 📊 Logging Status")
470
- if hf_token and hf_token.strip():
471
- st.success("✅ Logging enabled")
472
- st.caption("Interactions are being logged to HuggingFace")
473
- else:
474
- st.warning("⚠️ Logging disabled")
475
- st.caption("HF_TOKEN not available")
476
-
477
- st.markdown("---")
478
-
479
- # Clear Chat Button
480
- if st.button("🧹 Clear Chat"):
481
- st.session_state.responses = []
482
- st.session_state.processing = False
483
- # Generate new session ID for new chat
484
- st.session_state.session_id = str(uuid.uuid4())
485
- try:
486
- st.rerun()
487
- except AttributeError:
488
- st.experimental_rerun()
489
-
490
- st.markdown("---")
491
-
492
- # Chat History in Sidebar
493
- with st.expander("📜 Chat History"):
494
- for i, response in enumerate(st.session_state.get("responses", [])):
495
- if response.get("role") == "user":
496
- st.markdown(f"**You:** {response.get('content', '')[:50]}...")
497
- elif response.get("role") == "assistant":
498
- content = response.get('content', '')
499
- if isinstance(content, str) and len(content) > 50:
500
- st.markdown(f"**VayuChat:** {content[:50]}...")
501
- else:
502
- st.markdown(f"**VayuChat:** {str(content)[:50]}...")
503
- st.markdown("---")
504
-
505
- # Load quick prompts
506
- questions = []
507
- questions_file = join(self_path, "questions.txt")
508
- if os.path.exists(questions_file):
509
- try:
510
- with open(questions_file, 'r', encoding='utf-8') as f:
511
- content = f.read()
512
- questions = [q.strip() for q in content.split("\n") if q.strip()]
513
- print(f"Loaded {len(questions)} quick prompts") # Debug
514
- except Exception as e:
515
- st.error(f"Error loading questions: {e}")
516
- questions = []
517
-
518
- # Add some default prompts if file doesn't exist or is empty
519
- if not questions:
520
- questions = [
521
- "What is the average PM2.5 level in the dataset?",
522
- "Show me the air quality trend over time",
523
- "Which pollutant has the highest concentration?",
524
- "Create a correlation plot between different pollutants",
525
- "What are the peak pollution hours?",
526
- "Compare weekday vs weekend pollution levels"
527
- ]
528
-
529
- # Quick prompts section (horizontal)
530
- st.markdown("### 💭 Quick Prompts")
531
-
532
- # Create columns for horizontal layout
533
- cols_per_row = 2 # Reduced to 2 for better fit
534
- rows = [questions[i:i + cols_per_row] for i in range(0, len(questions), cols_per_row)]
535
-
536
- selected_prompt = None
537
- for row_idx, row in enumerate(rows):
538
- cols = st.columns(len(row))
539
- for col_idx, question in enumerate(row):
540
- with cols[col_idx]:
541
- # Create unique key using row and column indices
542
- unique_key = f"prompt_btn_{row_idx}_{col_idx}"
543
- button_text = f"📝 {question[:35]}{'...' if len(question) > 35 else ''}"
544
-
545
- if st.button(button_text,
546
- key=unique_key,
547
- help=question,
548
- use_container_width=True):
549
- selected_prompt = question
550
-
551
- st.markdown("---")
552
-
553
- # Initialize chat history and processing state
554
- if "responses" not in st.session_state:
555
- st.session_state.responses = []
556
- if "processing" not in st.session_state:
557
- st.session_state.processing = False
558
-
559
- def show_custom_response(response):
560
- """Custom response display function"""
561
- role = response.get("role", "assistant")
562
- content = response.get("content", "")
563
-
564
- if role == "user":
565
- st.markdown(f"""
566
- <div class='user-message'>
567
- <div class='user-info'>You</div>
568
- {content}
569
- </div>
570
- """, unsafe_allow_html=True)
571
- elif role == "assistant":
572
- st.markdown(f"""
573
- <div class='assistant-message'>
574
- <div class='assistant-info'>🤖 VayuChat</div>
575
- {content if isinstance(content, str) else str(content)}
576
- </div>
577
- """, unsafe_allow_html=True)
578
-
579
- # Show generated code if available
580
- if response.get("gen_code"):
581
- with st.expander("📋 View Generated Code"):
582
- st.code(response["gen_code"], language="python")
583
-
584
- # Try to display image if content is a file path
585
- try:
586
- if isinstance(content, str) and (content.endswith('.png') or content.endswith('.jpg')):
587
- if os.path.exists(content):
588
- st.image(content)
589
- return {"is_image": True}
590
- except:
591
- pass
592
-
593
- return {"is_image": False}
594
-
595
- def show_processing_indicator(model_name, question):
596
- """Show processing indicator"""
597
- st.markdown(f"""
598
- <div class='processing-indicator'>
599
- <div class='assistant-info'>🤖 VayuChat • Processing with {model_name}</div>
600
- <strong>Question:</strong> {question}<br>
601
- <em>🔄 Generating response...</em>
602
- </div>
603
- """, unsafe_allow_html=True)
604
-
605
- # Main chat container
606
- chat_container = st.container()
607
-
608
- with chat_container:
609
- # Display chat history
610
- for response_id, response in enumerate(st.session_state.responses):
611
- status = show_custom_response(response)
612
-
613
- # Show feedback section for assistant responses
614
- if response["role"] == "assistant":
615
- feedback_key = f"feedback_{int(response_id/2)}"
616
- error = response.get("error", "")
617
- output = response.get("content", "")
618
- last_prompt = response.get("last_prompt", "")
619
- code = response.get("gen_code", "")
620
-
621
- if "feedback" in st.session_state.responses[response_id]:
622
- feedback_data = st.session_state.responses[response_id]["feedback"]
623
- st.markdown(f"""
624
- <div class='feedback-section'>
625
- <strong>📝 Your Feedback:</strong> {feedback_data.get('score', '')}
626
- {f"- {feedback_data.get('text', '')}" if feedback_data.get('text') else ""}
627
- </div>
628
- """, unsafe_allow_html=True)
629
- else:
630
- # Beautiful feedback section
631
- st.markdown("---")
632
- st.markdown("**How was this response?**")
633
-
634
- col1, col2 = st.columns(2)
635
- with col1:
636
- thumbs_up = st.button("👍 Helpful", key=f"{feedback_key}_up", use_container_width=True)
637
- with col2:
638
- thumbs_down = st.button("👎 Not Helpful", key=f"{feedback_key}_down", use_container_width=True)
639
-
640
- if thumbs_up or thumbs_down:
641
- thumbs = "👍 Helpful" if thumbs_up else "👎 Not Helpful"
642
- comments = st.text_area(
643
- "💬 Tell us more (optional):",
644
- key=f"{feedback_key}_comments",
645
- placeholder="What could be improved? Any suggestions?",
646
- max_chars=500
647
- )
648
-
649
- if st.button("🚀 Submit Feedback", key=f"{feedback_key}_submit"):
650
- feedback = {"score": thumbs, "text": comments}
651
-
652
- # Upload feedback with enhanced error handling
653
- if upload_feedback(feedback, error, output, last_prompt, code, status or {}):
654
- st.session_state.responses[response_id]["feedback"] = feedback
655
- time.sleep(1) # Give user time to see success message
656
- st.rerun()
657
- else:
658
- st.error("Failed to submit feedback. Please try again.")
659
-
660
- # Show processing indicator if processing
661
- if st.session_state.get("processing"):
662
- show_processing_indicator(
663
- st.session_state.get("current_model", "Unknown"),
664
- st.session_state.get("current_question", "Processing...")
665
- )
666
-
667
- # Chat input (always visible at bottom)
668
- prompt = st.chat_input("💬 Ask me anything about air quality!", key="main_chat")
669
-
670
- # Handle selected prompt from quick prompts
671
- if selected_prompt:
672
- prompt = selected_prompt
673
-
674
- # Handle new queries
675
- if prompt and not st.session_state.get("processing"):
676
- # Prevent duplicate processing
677
- if "last_prompt" in st.session_state:
678
- last_prompt = st.session_state["last_prompt"]
679
- last_model_name = st.session_state.get("last_model_name", "")
680
- if (prompt == last_prompt) and (model_name == last_model_name):
681
- prompt = None
682
-
683
- if prompt:
684
- # Add user input to chat history
685
- user_response = get_from_user(prompt)
686
- st.session_state.responses.append(user_response)
687
-
688
- # Set processing state
689
- st.session_state.processing = True
690
- st.session_state.current_model = model_name
691
- st.session_state.current_question = prompt
692
-
693
- # Rerun to show processing indicator
694
- st.rerun()
695
-
696
- # Process the question if we're in processing state
697
- if st.session_state.get("processing"):
698
- prompt = st.session_state.get("current_question")
699
- model_name = st.session_state.get("current_model")
700
-
701
- try:
702
- response = ask_question(model_name=model_name, question=prompt)
703
-
704
- if not isinstance(response, dict):
705
- response = {
706
- "role": "assistant",
707
- "content": "❌ Error: Invalid response format",
708
- "gen_code": "",
709
- "ex_code": "",
710
- "last_prompt": prompt,
711
- "error": "Invalid response format"
712
- }
713
-
714
- response.setdefault("role", "assistant")
715
- response.setdefault("content", "No content generated")
716
- response.setdefault("gen_code", "")
717
- response.setdefault("ex_code", "")
718
- response.setdefault("last_prompt", prompt)
719
- response.setdefault("error", None)
720
-
721
- except Exception as e:
722
- response = {
723
- "role": "assistant",
724
- "content": f"Sorry, I encountered an error: {str(e)}",
725
- "gen_code": "",
726
- "ex_code": "",
727
- "last_prompt": prompt,
728
- "error": str(e)
729
- }
730
-
731
- st.session_state.responses.append(response)
732
- st.session_state["last_prompt"] = prompt
733
- st.session_state["last_model_name"] = model_name
734
- st.session_state.processing = False
735
-
736
- # Clear processing state
737
- if "current_model" in st.session_state:
738
- del st.session_state.current_model
739
- if "current_question" in st.session_state:
740
- del st.session_state.current_question
741
-
742
- st.rerun()
743
-
744
- # Auto-scroll to bottom
745
- if st.session_state.responses:
746
- st.markdown("<script>scrollToBottom();</script>", unsafe_allow_html=True)
747
-
748
- # Beautiful sidebar footer
749
- # with st.sidebar:
750
- # st.markdown("---")
751
- # st.markdown("""
752
- # <div class='contact-section'>
753
- # <h4>📄 Paper on VayuChat</h4>
754
- # <p>Learn more about VayuChat in our <a href='https://arxiv.org/abs/2411.12760' target='_blank'>Research Paper</a>.</p>
755
- # </div>
756
- # """, unsafe_allow_html=True)
757
-
758
- # Statistics (if logging is enabled)
759
- if hf_token and hf_token.strip():
760
- st.markdown("### 📈 Session Stats")
761
- total_interactions = len([r for r in st.session_state.get("responses", []) if r.get("role") == "assistant"])
762
- st.metric("Interactions", total_interactions)
763
-
764
- feedbacks_given = len([r for r in st.session_state.get("responses", []) if r.get("role") == "assistant" and "feedback" in r])
765
- st.metric("Feedbacks Given", feedbacks_given)
766
-
767
- # Footer
768
- st.markdown("""
769
- <div style='text-align: center; margin-top: 3rem; padding: 2rem; background: rgba(255,255,255,0.1); border-radius: 15px;'>
770
- <h3>🌍 Together for Cleaner Air</h3>
771
- <p>VayuChat - Empowering environmental awareness through AI</p>
772
- <small>© 2024 IIT Gandhinagar Sustainability Lab</small>
773
- </div>
774
  """, unsafe_allow_html=True)
 
1
+ import streamlit as st
2
+ import os
3
+ import json
4
+ import pandas as pd
5
+ import random
6
+ from os.path import join
7
+ from datetime import datetime
8
+ from src import (
9
+ preprocess_and_load_df,
10
+ load_agent,
11
+ ask_agent,
12
+ decorate_with_code,
13
+ show_response,
14
+ get_from_user,
15
+ load_smart_df,
16
+ ask_question,
17
+ )
18
+ from dotenv import load_dotenv
19
+ from langchain_groq import ChatGroq
20
+ from langchain_google_genai import ChatGoogleGenerativeAI
21
+ from streamlit_feedback import streamlit_feedback
22
+ from huggingface_hub import HfApi
23
+ from datasets import load_dataset, get_dataset_config_info, Dataset
24
+ from PIL import Image
25
+ import time
26
+ import uuid
27
+
28
+ # Page config with beautiful theme
29
+ st.set_page_config(
30
+ page_title="VayuChat - AI Air Quality Assistant",
31
+ page_icon="🌬️",
32
+ layout="wide",
33
+ initial_sidebar_state="expanded"
34
+ )
35
+
36
+ # Custom CSS for beautiful styling
37
+ st.markdown("""
38
+ <style>
39
+ /* Clean app background */
40
+ .stApp {
41
+ background-color: #ffffff;
42
+ color: #212529;
43
+ font-family: 'Segoe UI', sans-serif;
44
+ }
45
+
46
+ /* Sidebar */
47
+ [data-testid="stSidebar"] {
48
+ background-color: #f8f9fa;
49
+ border-right: 1px solid #dee2e6;
50
+ padding: 1rem;
51
+ }
52
+
53
+ /* Main title */
54
+ .main-title {
55
+ text-align: center;
56
+ color: #343a40;
57
+ font-size: 2.5rem;
58
+ font-weight: 700;
59
+ margin-bottom: 0.5rem;
60
+ }
61
+
62
+ /* Subtitle */
63
+ .subtitle {
64
+ text-align: center;
65
+ color: #6c757d;
66
+ font-size: 1.1rem;
67
+ margin-bottom: 1.5rem;
68
+ }
69
+
70
+ /* Instructions */
71
+ .instructions {
72
+ background-color: #f1f3f5;
73
+ border-left: 4px solid #0d6efd;
74
+ padding: 1rem;
75
+ margin-bottom: 1.5rem;
76
+ border-radius: 6px;
77
+ color: #495057;
78
+ text-align: left;
79
+ }
80
+
81
+ /* Quick prompt buttons */
82
+ .quick-prompt-container {
83
+ display: flex;
84
+ flex-wrap: wrap;
85
+ gap: 8px;
86
+ margin-bottom: 1.5rem;
87
+ padding: 1rem;
88
+ background-color: #f8f9fa;
89
+ border-radius: 10px;
90
+ border: 1px solid #dee2e6;
91
+ }
92
+
93
+ .quick-prompt-btn {
94
+ background-color: #0d6efd;
95
+ color: white;
96
+ border: none;
97
+ padding: 8px 16px;
98
+ border-radius: 20px;
99
+ font-size: 0.9rem;
100
+ cursor: pointer;
101
+ transition: all 0.2s ease;
102
+ white-space: nowrap;
103
+ }
104
+
105
+ .quick-prompt-btn:hover {
106
+ background-color: #0b5ed7;
107
+ transform: translateY(-2px);
108
+ }
109
+
110
+ /* User message styling */
111
+ .user-message {
112
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
113
+ color: white;
114
+ padding: 15px 20px;
115
+ border-radius: 20px 20px 5px 20px;
116
+ margin: 10px 0;
117
+ margin-left: auto;
118
+ margin-right: 0;
119
+ max-width: 80%;
120
+ position: relative;
121
+ box-shadow: 0 2px 10px rgba(0,0,0,0.1);
122
+ }
123
+
124
+ .user-info {
125
+ font-size: 0.8rem;
126
+ opacity: 0.8;
127
+ margin-bottom: 5px;
128
+ text-align: right;
129
+ }
130
+
131
+ /* Assistant message styling */
132
+ .assistant-message {
133
+ background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
134
+ color: white;
135
+ padding: 15px 20px;
136
+ border-radius: 20px 20px 20px 5px;
137
+ margin: 10px 0;
138
+ margin-left: 0;
139
+ margin-right: auto;
140
+ max-width: 80%;
141
+ position: relative;
142
+ box-shadow: 0 2px 10px rgba(0,0,0,0.1);
143
+ }
144
+
145
+ .assistant-info {
146
+ font-size: 0.8rem;
147
+ opacity: 0.8;
148
+ margin-bottom: 5px;
149
+ }
150
+
151
+ /* Processing indicator */
152
+ .processing-indicator {
153
+ background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
154
+ color: #333;
155
+ padding: 15px 20px;
156
+ border-radius: 20px 20px 20px 5px;
157
+ margin: 10px 0;
158
+ margin-left: 0;
159
+ margin-right: auto;
160
+ max-width: 80%;
161
+ position: relative;
162
+ box-shadow: 0 2px 10px rgba(0,0,0,0.1);
163
+ animation: pulse 2s infinite;
164
+ }
165
+
166
+ @keyframes pulse {
167
+ 0% { opacity: 1; }
168
+ 50% { opacity: 0.7; }
169
+ 100% { opacity: 1; }
170
+ }
171
+
172
+ /* Feedback box */
173
+ .feedback-section {
174
+ background-color: #f8f9fa;
175
+ border: 1px solid #dee2e6;
176
+ padding: 1rem;
177
+ border-radius: 8px;
178
+ margin: 1rem 0;
179
+ }
180
+
181
+ /* Success and error messages */
182
+ .success-message {
183
+ background-color: #d1e7dd;
184
+ color: #0f5132;
185
+ padding: 1rem;
186
+ border-radius: 6px;
187
+ border: 1px solid #badbcc;
188
+ }
189
+
190
+ .error-message {
191
+ background-color: #f8d7da;
192
+ color: #842029;
193
+ padding: 1rem;
194
+ border-radius: 6px;
195
+ border: 1px solid #f5c2c7;
196
+ }
197
+
198
+ /* Chat input */
199
+ .stChatInput {
200
+ border-radius: 6px;
201
+ border: 1px solid #ced4da;
202
+ background: #ffffff;
203
+ }
204
+
205
+ /* Button */
206
+ .stButton > button {
207
+ background-color: #0d6efd;
208
+ color: white;
209
+ border-radius: 6px;
210
+ padding: 0.5rem 1.25rem;
211
+ border: none;
212
+ font-weight: 600;
213
+ transition: background-color 0.2s ease;
214
+ }
215
+
216
+ .stButton > button:hover {
217
+ background-color: #0b5ed7;
218
+ }
219
+
220
+ /* Code details styling */
221
+ .code-details {
222
+ background-color: #f8f9fa;
223
+ border: 1px solid #dee2e6;
224
+ border-radius: 8px;
225
+ padding: 10px;
226
+ margin-top: 10px;
227
+ }
228
+
229
+ /* Hide default menu and footer */
230
+ #MainMenu {visibility: hidden;}
231
+ footer {visibility: hidden;}
232
+ header {visibility: hidden;}
233
+
234
+ /* Auto scroll */
235
+ .main-container {
236
+ height: 70vh;
237
+ overflow-y: auto;
238
+ }
239
+ </style>
240
+ """, unsafe_allow_html=True)
241
+
242
+ # Auto-scroll JavaScript
243
+ st.markdown("""
244
+ <script>
245
+ function scrollToBottom() {
246
+ setTimeout(function() {
247
+ const mainContainer = document.querySelector('.main-container');
248
+ if (mainContainer) {
249
+ mainContainer.scrollTop = mainContainer.scrollHeight;
250
+ }
251
+ window.scrollTo(0, document.body.scrollHeight);
252
+ }, 100);
253
+ }
254
+ </script>
255
+ """, unsafe_allow_html=True)
256
+
257
+ # FORCE reload environment variables
258
+ load_dotenv(override=True)
259
+
260
+ # Get API keys
261
+ Groq_Token = os.getenv("GROQ_API_KEY")
262
+ hf_token = os.getenv("HF_TOKEN")
263
+ gemini_token = os.getenv("GEMINI_TOKEN")
264
+
265
+ models = {
266
+ "gpt-oss-20b": "openai/gpt-oss-20b",
267
+ "gpt-oss-120b": "openai/gpt-oss-120b",
268
+ "llama3.1": "llama-3.1-8b-instant",
269
+ "llama3.3": "llama-3.3-70b-versatile",
270
+ "deepseek-R1": "deepseek-r1-distill-llama-70b",
271
+ "llama4 maverik":"meta-llama/llama-4-maverick-17b-128e-instruct",
272
+ "llama4 scout":"meta-llama/llama-4-scout-17b-16e-instruct",
273
+ "gemini-pro": "gemini-1.5-pro"
274
+ }
275
+
276
+ self_path = os.path.dirname(os.path.abspath(__file__))
277
+
278
+ # Initialize session ID for this session
279
+ if "session_id" not in st.session_state:
280
+ st.session_state.session_id = str(uuid.uuid4())
281
+
282
+ def upload_feedback(feedback, error, output, last_prompt, code, status):
283
+ """Enhanced feedback upload function with better logging and error handling"""
284
+ try:
285
+ if not hf_token or hf_token.strip() == "":
286
+ st.warning("⚠️ Cannot upload feedback - HF_TOKEN not available")
287
+ return False
288
+
289
+ # Create comprehensive feedback data
290
+ feedback_data = {
291
+ "timestamp": datetime.now().isoformat(),
292
+ "session_id": st.session_state.session_id,
293
+ "feedback_score": feedback.get("score", ""),
294
+ "feedback_comment": feedback.get("text", ""),
295
+ "user_prompt": last_prompt,
296
+ "ai_output": str(output),
297
+ "generated_code": code or "",
298
+ "error_message": error or "",
299
+ "is_image_output": status.get("is_image", False),
300
+ "success": not bool(error)
301
+ }
302
+
303
+ # Create unique folder name with timestamp
304
+ timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
305
+ random_id = str(uuid.uuid4())[:8]
306
+ folder_name = f"feedback_{timestamp_str}_{random_id}"
307
+
308
+ # Create markdown feedback file
309
+ markdown_content = f"""# VayuChat Feedback Report
310
+
311
+ ## Session Information
312
+ - **Timestamp**: {feedback_data['timestamp']}
313
+ - **Session ID**: {feedback_data['session_id']}
314
+
315
+ ## User Interaction
316
+ **Prompt**: {feedback_data['user_prompt']}
317
+
318
+ ## AI Response
319
+ **Output**: {feedback_data['ai_output']}
320
+
321
+ ## Generated Code
322
+ ```python
323
+ {feedback_data['generated_code']}
324
+ ```
325
+
326
+ ## Technical Details
327
+ - **Error Message**: {feedback_data['error_message']}
328
+ - **Is Image Output**: {feedback_data['is_image_output']}
329
+ - **Success**: {feedback_data['success']}
330
+
331
+ ## User Feedback
332
+ - **Score**: {feedback_data['feedback_score']}
333
+ - **Comments**: {feedback_data['feedback_comment']}
334
+ """
335
+
336
+ # Save markdown file locally
337
+ markdown_filename = f"{folder_name}.md"
338
+ markdown_local_path = f"/tmp/{markdown_filename}"
339
+
340
+ with open(markdown_local_path, "w", encoding="utf-8") as f:
341
+ f.write(markdown_content)
342
+
343
+ # Upload to Hugging Face
344
+ api = HfApi(token=hf_token)
345
+
346
+ # Upload markdown feedback
347
+ api.upload_file(
348
+ path_or_fileobj=markdown_local_path,
349
+ path_in_repo=f"data/{markdown_filename}",
350
+ repo_id="SustainabilityLabIITGN/VayuChat_Feedback",
351
+ repo_type="dataset",
352
+ )
353
+
354
+ # Upload image if it exists and is an image output
355
+ if status.get("is_image", False) and isinstance(output, str) and os.path.exists(output):
356
+ try:
357
+ image_filename = f"{folder_name}_plot.png"
358
+ api.upload_file(
359
+ path_or_fileobj=output,
360
+ path_in_repo=f"data/{image_filename}",
361
+ repo_id="SustainabilityLabIITGN/VayuChat_Feedback",
362
+ repo_type="dataset",
363
+ )
364
+ except Exception as img_error:
365
+ print(f"Error uploading image: {img_error}")
366
+
367
+ # Clean up local files
368
+ if os.path.exists(markdown_local_path):
369
+ os.remove(markdown_local_path)
370
+
371
+ st.success("🎉 Feedback uploaded successfully!")
372
+ return True
373
+
374
+ except Exception as e:
375
+ st.error(f"❌ Error uploading feedback: {e}")
376
+ print(f"Feedback upload error: {e}")
377
+ return False
378
+
379
+ # Beautiful header
380
+ st.markdown("<h1 class='main-title'>🌬️ VayuChat</h1>", unsafe_allow_html=True)
381
+
382
+ st.markdown("""
383
+ <div class='subtitle'>
384
+ <strong>AI-Powered Air Quality Insights</strong><br>
385
+ Simplifying pollution analysis using conversational AI.
386
+ </div>
387
+ """, unsafe_allow_html=True)
388
+
389
+ st.markdown("""
390
+ <div class='instructions'>
391
+ <strong>How to Use:</strong><br>
392
+ Select a model from the sidebar and ask questions directly in the chat. Use quick prompts below for common queries.
393
+ </div>
394
+ """, unsafe_allow_html=True)
395
+
396
+ os.environ["PANDASAI_API_KEY"] = "$2a$10$gbmqKotzJOnqa7iYOun8eO50TxMD/6Zw1pLI2JEoqncwsNx4XeBS2"
397
+
398
+ # Load data with error handling
399
+ try:
400
+ df = preprocess_and_load_df(join(self_path, "Data.csv"))
401
+ st.success("✅ Data loaded successfully!")
402
+ except Exception as e:
403
+ st.error(f"❌ Error loading data: {e}")
404
+ st.stop()
405
+
406
+ inference_server = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
407
+ image_path = "IITGN_Logo.png"
408
+
409
+ # Beautiful sidebar
410
+ with st.sidebar:
411
+ # Logo and title
412
+ col1, col2, col3 = st.columns([1, 2, 1])
413
+ with col2:
414
+ if os.path.exists(image_path):
415
+ st.image(image_path, use_column_width=True)
416
+
417
+ # Session info
418
+ st.markdown(f"**Session ID**: `{st.session_state.session_id[:8]}...`")
419
+
420
+ # Model selection
421
+ st.markdown("### 🤖 AI Model Selection")
422
+
423
+ # Filter available models
424
+ available_models = []
425
+ model_names = list(models.keys())
426
+ groq_models = []
427
+ gemini_models = []
428
+ for model_name in model_names:
429
+ if "gemini" not in model_name:
430
+ groq_models.append(model_name)
431
+ else:
432
+ gemini_models.append(model_name)
433
+ if Groq_Token and Groq_Token.strip():
434
+ available_models.extend(groq_models)
435
+ if gemini_token and gemini_token.strip():
436
+ available_models.extend(gemini_models)
437
+
438
+ if not available_models:
439
+ st.error("❌ No API keys available! Please set up your API keys in the .env file")
440
+ st.stop()
441
+
442
+ model_name = st.selectbox(
443
+ "Choose your AI assistant:",
444
+ available_models,
445
+ help="Different models have different strengths. Try them all!"
446
+ )
447
+
448
+ # Model descriptions
449
+ model_descriptions = {
450
+ "llama3.1": "🦙 Fast and efficient for general queries",
451
+ "llama3.3": "🦙 Most advanced LLaMA model for complex reasoning",
452
+ "mistral": "⚡ Balanced performance and speed",
453
+ "gemma": "💎 Google's lightweight model",
454
+ "gemini-pro": "🧠 Google's most powerful model",
455
+ "gpt-oss-20b": "📘 OpenAI's compact open-weight GPT for everyday tasks",
456
+ "gpt-oss-120b": "📚 OpenAI's massive open-weight GPT for nuanced responses",
457
+ "deepseek-R1": "🔍 DeepSeek's distilled LLaMA model for efficient reasoning",
458
+ "llama4 maverik": "🚀 Meta's LLaMA 4 Maverick — high-performance instruction model",
459
+ "llama4 scout": "🛰️ Meta's LLaMA 4 Scout — optimized for adaptive reasoning"
460
+ }
461
+
462
+
463
+ if model_name in model_descriptions:
464
+ st.info(model_descriptions[model_name])
465
+
466
+ st.markdown("---")
467
+
468
+ # Logging status
469
+ st.markdown("### 📊 Logging Status")
470
+ if hf_token and hf_token.strip():
471
+ st.success("✅ Logging enabled")
472
+ st.caption("Interactions are being logged to HuggingFace")
473
+ else:
474
+ st.warning("⚠️ Logging disabled")
475
+ st.caption("HF_TOKEN not available")
476
+
477
+ st.markdown("---")
478
+
479
+ # Clear Chat Button
480
+ if st.button("🧹 Clear Chat"):
481
+ st.session_state.responses = []
482
+ st.session_state.processing = False
483
+ # Generate new session ID for new chat
484
+ st.session_state.session_id = str(uuid.uuid4())
485
+ try:
486
+ st.rerun()
487
+ except AttributeError:
488
+ st.experimental_rerun()
489
+
490
+ st.markdown("---")
491
+
492
+ # Chat History in Sidebar
493
+ with st.expander("📜 Chat History"):
494
+ for i, response in enumerate(st.session_state.get("responses", [])):
495
+ if response.get("role") == "user":
496
+ st.markdown(f"**You:** {response.get('content', '')[:50]}...")
497
+ elif response.get("role") == "assistant":
498
+ content = response.get('content', '')
499
+ if isinstance(content, str) and len(content) > 50:
500
+ st.markdown(f"**VayuChat:** {content[:50]}...")
501
+ else:
502
+ st.markdown(f"**VayuChat:** {str(content)[:50]}...")
503
+ st.markdown("---")
504
+
505
+ # Load quick prompts
506
+ questions = []
507
+ questions_file = join(self_path, "questions.txt")
508
+ if os.path.exists(questions_file):
509
+ try:
510
+ with open(questions_file, 'r', encoding='utf-8') as f:
511
+ content = f.read()
512
+ questions = [q.strip() for q in content.split("\n") if q.strip()]
513
+ print(f"Loaded {len(questions)} quick prompts") # Debug
514
+ except Exception as e:
515
+ st.error(f"Error loading questions: {e}")
516
+ questions = []
517
+
518
+ # Add some default prompts if file doesn't exist or is empty
519
+ if not questions:
520
+ questions = [
521
+ "What is the average PM2.5 level in the dataset?",
522
+ "Show me the air quality trend over time",
523
+ "Which pollutant has the highest concentration?",
524
+ "Create a correlation plot between different pollutants",
525
+ "What are the peak pollution hours?",
526
+ "Compare weekday vs weekend pollution levels"
527
+ ]
528
+
529
+ # Quick prompts section (horizontal)
530
+ st.markdown("### 💭 Quick Prompts")
531
+
532
+ # Create columns for horizontal layout
533
+ cols_per_row = 2 # Reduced to 2 for better fit
534
+ rows = [questions[i:i + cols_per_row] for i in range(0, len(questions), cols_per_row)]
535
+
536
+ selected_prompt = None
537
+ for row_idx, row in enumerate(rows):
538
+ cols = st.columns(len(row))
539
+ for col_idx, question in enumerate(row):
540
+ with cols[col_idx]:
541
+ # Create unique key using row and column indices
542
+ unique_key = f"prompt_btn_{row_idx}_{col_idx}"
543
+ button_text = f"📝 {question[:35]}{'...' if len(question) > 35 else ''}"
544
+
545
+ if st.button(button_text,
546
+ key=unique_key,
547
+ help=question,
548
+ use_container_width=True):
549
+ selected_prompt = question
550
+
551
+ st.markdown("---")
552
+
553
+ # Initialize chat history and processing state
554
+ if "responses" not in st.session_state:
555
+ st.session_state.responses = []
556
+ if "processing" not in st.session_state:
557
+ st.session_state.processing = False
558
+
559
+ def show_custom_response(response):
560
+ """Custom response display function"""
561
+ role = response.get("role", "assistant")
562
+ content = response.get("content", "")
563
+
564
+ if role == "user":
565
+ st.markdown(f"""
566
+ <div class='user-message'>
567
+ <div class='user-info'>You</div>
568
+ {content}
569
+ </div>
570
+ """, unsafe_allow_html=True)
571
+ elif role == "assistant":
572
+ st.markdown(f"""
573
+ <div class='assistant-message'>
574
+ <div class='assistant-info'>🤖 VayuChat</div>
575
+ {content if isinstance(content, str) else str(content)}
576
+ </div>
577
+ """, unsafe_allow_html=True)
578
+
579
+ # Show generated code if available
580
+ if response.get("gen_code"):
581
+ with st.expander("📋 View Generated Code"):
582
+ st.code(response["gen_code"], language="python")
583
+
584
+ # Try to display image if content is a file path
585
+ try:
586
+ if isinstance(content, str) and (content.endswith('.png') or content.endswith('.jpg')):
587
+ if os.path.exists(content):
588
+ st.image(content)
589
+ return {"is_image": True}
590
+ except:
591
+ pass
592
+
593
+ return {"is_image": False}
594
+
595
+ def show_processing_indicator(model_name, question):
596
+ """Show processing indicator"""
597
+ st.markdown(f"""
598
+ <div class='processing-indicator'>
599
+ <div class='assistant-info'>🤖 VayuChat • Processing with {model_name}</div>
600
+ <strong>Question:</strong> {question}<br>
601
+ <em>🔄 Generating response...</em>
602
+ </div>
603
+ """, unsafe_allow_html=True)
604
+
605
+ # Main chat container
606
+ chat_container = st.container()
607
+
608
+ with chat_container:
609
+ # Display chat history
610
+ for response_id, response in enumerate(st.session_state.responses):
611
+ status = show_custom_response(response)
612
+
613
+ # Show feedback section for assistant responses
614
+ if response["role"] == "assistant":
615
+ feedback_key = f"feedback_{int(response_id/2)}"
616
+ error = response.get("error", "")
617
+ output = response.get("content", "")
618
+ last_prompt = response.get("last_prompt", "")
619
+ code = response.get("gen_code", "")
620
+
621
+ if "feedback" in st.session_state.responses[response_id]:
622
+ feedback_data = st.session_state.responses[response_id]["feedback"]
623
+ st.markdown(f"""
624
+ <div class='feedback-section'>
625
+ <strong>📝 Your Feedback:</strong> {feedback_data.get('score', '')}
626
+ {f"- {feedback_data.get('text', '')}" if feedback_data.get('text') else ""}
627
+ </div>
628
+ """, unsafe_allow_html=True)
629
+ else:
630
+ # Beautiful feedback section
631
+ st.markdown("---")
632
+ st.markdown("**How was this response?**")
633
+
634
+ col1, col2 = st.columns(2)
635
+ with col1:
636
+ thumbs_up = st.button("👍 Helpful", key=f"{feedback_key}_up", use_container_width=True)
637
+ with col2:
638
+ thumbs_down = st.button("👎 Not Helpful", key=f"{feedback_key}_down", use_container_width=True)
639
+
640
+ if thumbs_up or thumbs_down:
641
+ thumbs = "👍 Helpful" if thumbs_up else "👎 Not Helpful"
642
+ comments = st.text_area(
643
+ "💬 Tell us more (optional):",
644
+ key=f"{feedback_key}_comments",
645
+ placeholder="What could be improved? Any suggestions?",
646
+ max_chars=500
647
+ )
648
+
649
+ if st.button("🚀 Submit Feedback", key=f"{feedback_key}_submit"):
650
+ feedback = {"score": thumbs, "text": comments}
651
+
652
+ # Upload feedback with enhanced error handling
653
+ if upload_feedback(feedback, error, output, last_prompt, code, status or {}):
654
+ st.session_state.responses[response_id]["feedback"] = feedback
655
+ time.sleep(1) # Give user time to see success message
656
+ st.rerun()
657
+ else:
658
+ st.error("Failed to submit feedback. Please try again.")
659
+
660
+ # Show processing indicator if processing
661
+ if st.session_state.get("processing"):
662
+ show_processing_indicator(
663
+ st.session_state.get("current_model", "Unknown"),
664
+ st.session_state.get("current_question", "Processing...")
665
+ )
666
+
667
+ # Chat input (always visible at bottom)
668
+ prompt = st.chat_input("💬 Ask me anything about air quality!", key="main_chat")
669
+
670
+ # Handle selected prompt from quick prompts
671
+ if selected_prompt:
672
+ prompt = selected_prompt
673
+
674
+ # Handle new queries
675
+ if prompt and not st.session_state.get("processing"):
676
+ # Prevent duplicate processing
677
+ if "last_prompt" in st.session_state:
678
+ last_prompt = st.session_state["last_prompt"]
679
+ last_model_name = st.session_state.get("last_model_name", "")
680
+ if (prompt == last_prompt) and (model_name == last_model_name):
681
+ prompt = None
682
+
683
+ if prompt:
684
+ # Add user input to chat history
685
+ user_response = get_from_user(prompt)
686
+ st.session_state.responses.append(user_response)
687
+
688
+ # Set processing state
689
+ st.session_state.processing = True
690
+ st.session_state.current_model = model_name
691
+ st.session_state.current_question = prompt
692
+
693
+ # Rerun to show processing indicator
694
+ st.rerun()
695
+
696
+ # Process the question if we're in processing state
697
+
698
+ if st.session_state.get("processing"):
699
+ prompt = st.session_state.get("current_question")
700
+ model_name = st.session_state.get("current_model")
701
+ try:
702
+ from src import SYSTEM_PROMPT
703
+ agent = load_agent(df, SYSTEM_PROMPT, inference_server, name=model_name)
704
+ response = ask_agent(agent, prompt)
705
+ if not isinstance(response, dict):
706
+ response = {
707
+ "role": "assistant",
708
+ "content": "❌ Error: Invalid response format",
709
+ "gen_code": "",
710
+ "ex_code": "",
711
+ "last_prompt": prompt,
712
+ "error": "Invalid response format"
713
+ }
714
+ response.setdefault("role", "assistant")
715
+ response.setdefault("content", "No content generated")
716
+ response.setdefault("gen_code", "")
717
+ response.setdefault("ex_code", "")
718
+ response.setdefault("last_prompt", prompt)
719
+ response.setdefault("error", None)
720
+ except Exception as e:
721
+ response = {
722
+ "role": "assistant",
723
+ "content": f"Sorry, I encountered an error: {str(e)}",
724
+ "gen_code": "",
725
+ "ex_code": "",
726
+ "last_prompt": prompt,
727
+ "error": str(e)
728
+ }
729
+
730
+ st.session_state.responses.append(response)
731
+ st.session_state["last_prompt"] = prompt
732
+ st.session_state["last_model_name"] = model_name
733
+ st.session_state.processing = False
734
+
735
+ # Clear processing state
736
+ if "current_model" in st.session_state:
737
+ del st.session_state.current_model
738
+ if "current_question" in st.session_state:
739
+ del st.session_state.current_question
740
+
741
+ st.rerun()
742
+
743
+ # Auto-scroll to bottom
744
+ if st.session_state.responses:
745
+ st.markdown("<script>scrollToBottom();</script>", unsafe_allow_html=True)
746
+
747
+ # Beautiful sidebar footer
748
+ # with st.sidebar:
749
+ # st.markdown("---")
750
+ # st.markdown("""
751
+ # <div class='contact-section'>
752
+ # <h4>📄 Paper on VayuChat</h4>
753
+ # <p>Learn more about VayuChat in our <a href='https://arxiv.org/abs/2411.12760' target='_blank'>Research Paper</a>.</p>
754
+ # </div>
755
+ # """, unsafe_allow_html=True)
756
+
757
+ # Statistics (if logging is enabled)
758
+ if hf_token and hf_token.strip():
759
+ st.markdown("### 📈 Session Stats")
760
+ total_interactions = len([r for r in st.session_state.get("responses", []) if r.get("role") == "assistant"])
761
+ st.metric("Interactions", total_interactions)
762
+
763
+ feedbacks_given = len([r for r in st.session_state.get("responses", []) if r.get("role") == "assistant" and "feedback" in r])
764
+ st.metric("Feedbacks Given", feedbacks_given)
765
+
766
+ # Footer
767
+ st.markdown("""
768
+ <div style='text-align: center; margin-top: 3rem; padding: 2rem; background: rgba(255,255,255,0.1); border-radius: 15px;'>
769
+ <h3>🌍 Together for Cleaner Air</h3>
770
+ <p>VayuChat - Empowering environmental awareness through AI</p>
771
+ <small>© 2025 IIT Gandhinagar Sustainability Lab</small>
772
+ </div>
 
773
  """, unsafe_allow_html=True)
ncap_funding_data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:389da9b46a0f9f02e2a65d033b2f7610b7b75db18163385def1e4c78b0333ab9
3
+ size 8613
src.py CHANGED
@@ -1,551 +1,572 @@
1
- import os
2
- import pandas as pd
3
- from pandasai import Agent, SmartDataframe
4
- from typing import Tuple
5
- from PIL import Image
6
- from pandasai.llm import HuggingFaceTextGen
7
- from dotenv import load_dotenv
8
- from langchain_groq import ChatGroq
9
- from langchain_google_genai import ChatGoogleGenerativeAI
10
- import matplotlib.pyplot as plt
11
- import json
12
- from datetime import datetime
13
- from huggingface_hub import HfApi
14
- import uuid
15
-
16
- # FORCE reload environment variables
17
- load_dotenv(override=True)
18
-
19
- # Get API keys with explicit None handling and debugging
20
- Groq_Token = os.getenv("GROQ_API_KEY")
21
- hf_token = os.getenv("HF_TOKEN")
22
- gemini_token = os.getenv("GEMINI_TOKEN")
23
-
24
- # Debug print (remove in production)
25
- print(f"Debug - Groq Token: {'Present' if Groq_Token else 'Missing'}")
26
- print(f"Debug - Groq Token Value: {Groq_Token[:10] + '...' if Groq_Token else 'None'}")
27
- print(f"Debug - Gemini Token: {'Present' if gemini_token else 'Missing'}")
28
-
29
- models = {
30
- "gpt-oss-20b": "openai/gpt-oss-20b",
31
- "gpt-oss-120b": "openai/gpt-oss-120b",
32
- "llama3.1": "llama-3.1-8b-instant",
33
- "llama3.3": "llama-3.3-70b-versatile",
34
- "deepseek-R1": "deepseek-r1-distill-llama-70b",
35
- "llama4 maverik":"meta-llama/llama-4-maverick-17b-128e-instruct",
36
- "llama4 scout":"meta-llama/llama-4-scout-17b-16e-instruct",
37
- "gemini-pro": "gemini-1.5-pro"
38
- }
39
-
40
- def log_interaction(user_query, model_name, response_content, generated_code, execution_time, error_message=None, is_image=False):
41
- """Log user interactions to Hugging Face dataset"""
42
- try:
43
- if not hf_token or hf_token.strip() == "":
44
- print("Warning: HF_TOKEN not available, skipping logging")
45
- return
46
-
47
- # Create log entry
48
- log_entry = {
49
- "timestamp": datetime.now().isoformat(),
50
- "session_id": str(uuid.uuid4()),
51
- "user_query": user_query,
52
- "model_name": model_name,
53
- "response_content": str(response_content),
54
- "generated_code": generated_code or "",
55
- "execution_time_seconds": execution_time,
56
- "error_message": error_message or "",
57
- "is_image_output": is_image,
58
- "success": error_message is None
59
- }
60
-
61
- # Create DataFrame
62
- df = pd.DataFrame([log_entry])
63
-
64
- # Create unique filename with timestamp
65
- timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
66
- random_id = str(uuid.uuid4())[:8]
67
- filename = f"interaction_log_{timestamp_str}_{random_id}.parquet"
68
-
69
- # Save locally first
70
- local_path = f"/tmp/{filename}"
71
- df.to_parquet(local_path, index=False)
72
-
73
- # Upload to Hugging Face
74
- api = HfApi(token=hf_token)
75
- api.upload_file(
76
- path_or_fileobj=local_path,
77
- path_in_repo=f"data/{filename}",
78
- repo_id="SustainabilityLabIITGN/VayuChat_logs",
79
- repo_type="dataset",
80
- )
81
-
82
- # Clean up local file
83
- if os.path.exists(local_path):
84
- os.remove(local_path)
85
-
86
- print(f"Successfully logged interaction to HuggingFace: {filename}")
87
-
88
- except Exception as e:
89
- print(f"Error logging interaction: {e}")
90
-
91
- def preprocess_and_load_df(path: str) -> pd.DataFrame:
92
- """Load and preprocess the dataframe"""
93
- try:
94
- df = pd.read_csv(path)
95
- df["Timestamp"] = pd.to_datetime(df["Timestamp"])
96
- return df
97
- except Exception as e:
98
- raise Exception(f"Error loading dataframe: {e}")
99
-
100
- def load_agent(df: pd.DataFrame, context: str, inference_server: str, name="mistral") -> Agent:
101
- """Load pandas AI agent with error handling"""
102
- try:
103
- if name == "gemini-pro":
104
- if not gemini_token or gemini_token.strip() == "":
105
- raise ValueError("Gemini API token not available or empty")
106
- llm = ChatGoogleGenerativeAI(
107
- model=models[name],
108
- google_api_key=gemini_token,
109
- temperature=0.1
110
- )
111
- else:
112
- if not Groq_Token or Groq_Token.strip() == "":
113
- raise ValueError("Groq API token not available or empty")
114
- llm = ChatGroq(
115
- model=models[name],
116
- api_key=Groq_Token,
117
- temperature=0.1
118
- )
119
-
120
- agent = Agent(df, config={"llm": llm, "enable_cache": False, "options": {"wait_for_model": True}})
121
- if context:
122
- agent.add_message(context)
123
- return agent
124
- except Exception as e:
125
- raise Exception(f"Error loading agent: {e}")
126
-
127
- def load_smart_df(df: pd.DataFrame, inference_server: str, name="mistral") -> SmartDataframe:
128
- """Load smart dataframe with error handling"""
129
- try:
130
- if name == "gemini-pro":
131
- if not gemini_token or gemini_token.strip() == "":
132
- raise ValueError("Gemini API token not available or empty")
133
- llm = ChatGoogleGenerativeAI(
134
- model=models[name],
135
- google_api_key=gemini_token,
136
- temperature=0.1
137
- )
138
- else:
139
- if not Groq_Token or Groq_Token.strip() == "":
140
- raise ValueError("Groq API token not available or empty")
141
- llm = ChatGroq(
142
- model=models[name],
143
- api_key=Groq_Token,
144
- temperature=0.1
145
- )
146
-
147
- df = SmartDataframe(df, config={"llm": llm, "max_retries": 5, "enable_cache": False})
148
- return df
149
- except Exception as e:
150
- raise Exception(f"Error loading smart dataframe: {e}")
151
-
152
- def get_from_user(prompt):
153
- """Format user prompt"""
154
- return {"role": "user", "content": prompt}
155
-
156
- def ask_agent(agent: Agent, prompt: str) -> dict:
157
- """Ask agent with comprehensive error handling"""
158
- start_time = datetime.now()
159
- try:
160
- response = agent.chat(prompt)
161
- execution_time = (datetime.now() - start_time).total_seconds()
162
-
163
- gen_code = getattr(agent, 'last_code_generated', '')
164
- ex_code = getattr(agent, 'last_code_executed', '')
165
- last_prompt = getattr(agent, 'last_prompt', prompt)
166
-
167
- # Log the interaction
168
- log_interaction(
169
- user_query=prompt,
170
- model_name="pandas_ai_agent",
171
- response_content=response,
172
- generated_code=gen_code,
173
- execution_time=execution_time,
174
- error_message=None,
175
- is_image=isinstance(response, str) and any(response.endswith(ext) for ext in ['.png', '.jpg', '.jpeg'])
176
- )
177
-
178
- return {
179
- "role": "assistant",
180
- "content": response,
181
- "gen_code": gen_code,
182
- "ex_code": ex_code,
183
- "last_prompt": last_prompt,
184
- "error": None
185
- }
186
- except Exception as e:
187
- execution_time = (datetime.now() - start_time).total_seconds()
188
- error_msg = str(e)
189
-
190
- # Log the failed interaction
191
- log_interaction(
192
- user_query=prompt,
193
- model_name="pandas_ai_agent",
194
- response_content=f"Error: {error_msg}",
195
- generated_code="",
196
- execution_time=execution_time,
197
- error_message=error_msg,
198
- is_image=False
199
- )
200
-
201
- return {
202
- "role": "assistant",
203
- "content": f"Error: {error_msg}",
204
- "gen_code": "",
205
- "ex_code": "",
206
- "last_prompt": prompt,
207
- "error": error_msg
208
- }
209
-
210
- def decorate_with_code(response: dict) -> str:
211
- """Decorate response with code details"""
212
- gen_code = response.get("gen_code", "No code generated")
213
- last_prompt = response.get("last_prompt", "No prompt")
214
-
215
- return f"""<details>
216
- <summary>Generated Code</summary>
217
-
218
- ```python
219
- {gen_code}
220
- ```
221
- </details>
222
-
223
- <details>
224
- <summary>Prompt</summary>
225
-
226
- {last_prompt}
227
- """
228
-
229
- def show_response(st, response):
230
- """Display response with error handling"""
231
- try:
232
- with st.chat_message(response["role"]):
233
- content = response.get("content", "No content")
234
-
235
- try:
236
- # Try to open as image
237
- image = Image.open(content)
238
- if response.get("gen_code"):
239
- st.markdown(decorate_with_code(response), unsafe_allow_html=True)
240
- st.image(image)
241
- return {"is_image": True}
242
- except:
243
- # Not an image, display as text
244
- if response.get("gen_code"):
245
- display_content = decorate_with_code(response) + f"""</details>
246
-
247
- {content}"""
248
- else:
249
- display_content = content
250
- st.markdown(display_content, unsafe_allow_html=True)
251
- return {"is_image": False}
252
- except Exception as e:
253
- st.error(f"Error displaying response: {e}")
254
- return {"is_image": False}
255
-
256
- def ask_question(model_name, question):
257
- """Ask question with comprehensive error handling and logging"""
258
- start_time = datetime.now()
259
- try:
260
- # Reload environment variables to get fresh values
261
- load_dotenv(override=True)
262
- fresh_groq_token = os.getenv("GROQ_API_KEY")
263
- fresh_gemini_token = os.getenv("GEMINI_TOKEN")
264
-
265
- print(f"ask_question - Fresh Groq Token: {'Present' if fresh_groq_token else 'Missing'}")
266
-
267
- # Check API availability with fresh tokens
268
- if model_name == "gemini-pro":
269
- if not fresh_gemini_token or fresh_gemini_token.strip() == "":
270
- execution_time = (datetime.now() - start_time).total_seconds()
271
- error_msg = "Missing or empty API token"
272
-
273
- # Log the failed interaction
274
- log_interaction(
275
- user_query=question,
276
- model_name=model_name,
277
- response_content="❌ Gemini API token not available or empty",
278
- generated_code="",
279
- execution_time=execution_time,
280
- error_message=error_msg,
281
- is_image=False
282
- )
283
-
284
- return {
285
- "role": "assistant",
286
- "content": "❌ Gemini API token not available or empty. Please set GEMINI_TOKEN in your environment variables.",
287
- "gen_code": "",
288
- "ex_code": "",
289
- "last_prompt": question,
290
- "error": error_msg
291
- }
292
- llm = ChatGoogleGenerativeAI(
293
- model=models[model_name],
294
- google_api_key=fresh_gemini_token,
295
- temperature=0
296
- )
297
- else:
298
- if not fresh_groq_token or fresh_groq_token.strip() == "":
299
- execution_time = (datetime.now() - start_time).total_seconds()
300
- error_msg = "Missing or empty API token"
301
-
302
- # Log the failed interaction
303
- log_interaction(
304
- user_query=question,
305
- model_name=model_name,
306
- response_content=" Groq API token not available or empty",
307
- generated_code="",
308
- execution_time=execution_time,
309
- error_message=error_msg,
310
- is_image=False
311
- )
312
-
313
- return {
314
- "role": "assistant",
315
- "content": "❌ Groq API token not available or empty. Please set GROQ_API_KEY in your environment variables and restart the application.",
316
- "gen_code": "",
317
- "ex_code": "",
318
- "last_prompt": question,
319
- "error": error_msg
320
- }
321
-
322
- # Test the API key by trying to create the client
323
- try:
324
- llm = ChatGroq(
325
- model=models[model_name],
326
- api_key=fresh_groq_token,
327
- temperature=0.1
328
- )
329
- # Test with a simple call to verify the API key works
330
- test_response = llm.invoke("Test")
331
- print("API key test successful")
332
- except Exception as api_error:
333
- execution_time = (datetime.now() - start_time).total_seconds()
334
- error_msg = str(api_error)
335
-
336
- if "organization_restricted" in error_msg.lower() or "unauthorized" in error_msg.lower():
337
- response_content = "❌ API Key Error: Your Groq API key appears to be invalid, expired, or restricted. Please check your API key in the .env file."
338
- log_error_msg = f"API key validation failed: {error_msg}"
339
- else:
340
- response_content = f"❌ API Connection Error: {error_msg}"
341
- log_error_msg = error_msg
342
-
343
- # Log the failed interaction
344
- log_interaction(
345
- user_query=question,
346
- model_name=model_name,
347
- response_content=response_content,
348
- generated_code="",
349
- execution_time=execution_time,
350
- error_message=log_error_msg,
351
- is_image=False
352
- )
353
-
354
- return {
355
- "role": "assistant",
356
- "content": response_content,
357
- "gen_code": "",
358
- "ex_code": "",
359
- "last_prompt": question,
360
- "error": log_error_msg
361
- }
362
-
363
- # Check if data file exists
364
- if not os.path.exists("Data.csv"):
365
- execution_time = (datetime.now() - start_time).total_seconds()
366
- error_msg = "Data file not found"
367
-
368
- # Log the failed interaction
369
- log_interaction(
370
- user_query=question,
371
- model_name=model_name,
372
- response_content="❌ Data.csv file not found",
373
- generated_code="",
374
- execution_time=execution_time,
375
- error_message=error_msg,
376
- is_image=False
377
- )
378
-
379
- return {
380
- "role": "assistant",
381
- "content": "❌ Data.csv file not found. Please ensure the data file is in the correct location.",
382
- "gen_code": "",
383
- "ex_code": "",
384
- "last_prompt": question,
385
- "error": error_msg
386
- }
387
-
388
- df_check = pd.read_csv("Data.csv")
389
- df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"])
390
- df_check = df_check.head(5)
391
-
392
- new_line = "\n"
393
- parameters = {"font.size": 12, "figure.dpi": 600}
394
-
395
- template = f"""```python
396
- import pandas as pd
397
- import matplotlib.pyplot as plt
398
- import uuid
399
-
400
- plt.rcParams.update({parameters})
401
-
402
- df = pd.read_csv("Data.csv")
403
- df["Timestamp"] = pd.to_datetime(df["Timestamp"])
404
-
405
- # Available columns and data types:
406
- {new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))}
407
-
408
- # Question: {question.strip()}
409
- # Generate code to answer the question and save result in 'answer' variable
410
- # If creating a plot, save it with a unique filename and store the filename in 'answer'
411
- # If returning text/numbers, store the result directly in 'answer'
412
- ```"""
413
-
414
- system_prompt = """You are a helpful assistant that generates Python code for data analysis.
415
-
416
- Rules:
417
- 1. Always save your final result in a variable called 'answer'
418
- 2. If creating a plot, save it with plt.savefig() and store the filename in 'answer'
419
- 3. If returning text/numbers, store the result directly in 'answer'
420
- 4. Use descriptive variable names and add comments
421
- 5. Handle potential errors gracefully
422
- 6. For plots, use unique filenames to avoid conflicts
423
- """
424
-
425
- query = f"""{system_prompt}
426
-
427
- Complete the following code to answer the user's question:
428
-
429
- {template}
430
- """
431
-
432
- # Make API call
433
- if model_name == "gemini-pro":
434
- response = llm.invoke(query)
435
- answer = response.content
436
- else:
437
- response = llm.invoke(query)
438
- answer = response.content
439
-
440
- # Extract and execute code
441
- try:
442
- if "```python" in answer:
443
- code_part = answer.split("```python")[1].split("```")[0]
444
- else:
445
- code_part = answer
446
-
447
- full_code = f"""
448
- {template.split("```python")[1].split("```")[0]}
449
- {code_part}
450
- """
451
-
452
- # Execute code in a controlled environment
453
- local_vars = {}
454
- global_vars = {
455
- 'pd': pd,
456
- 'plt': plt,
457
- 'os': os,
458
- 'uuid': __import__('uuid')
459
- }
460
-
461
- exec(full_code, global_vars, local_vars)
462
-
463
- # Get the answer
464
- if 'answer' in local_vars:
465
- answer_result = local_vars['answer']
466
- else:
467
- answer_result = "No answer variable found in generated code"
468
-
469
- execution_time = (datetime.now() - start_time).total_seconds()
470
-
471
- # Determine if output is an image
472
- is_image = isinstance(answer_result, str) and any(answer_result.endswith(ext) for ext in ['.png', '.jpg', '.jpeg'])
473
-
474
- # Log successful interaction
475
- log_interaction(
476
- user_query=question,
477
- model_name=model_name,
478
- response_content=str(answer_result),
479
- generated_code=full_code,
480
- execution_time=execution_time,
481
- error_message=None,
482
- is_image=is_image
483
- )
484
-
485
- return {
486
- "role": "assistant",
487
- "content": answer_result,
488
- "gen_code": full_code,
489
- "ex_code": full_code,
490
- "last_prompt": question,
491
- "error": None
492
- }
493
-
494
- except Exception as code_error:
495
- execution_time = (datetime.now() - start_time).total_seconds()
496
- error_msg = str(code_error)
497
-
498
- # Log the failed code execution
499
- log_interaction(
500
- user_query=question,
501
- model_name=model_name,
502
- response_content=f"❌ Error executing generated code: {error_msg}",
503
- generated_code=full_code if 'full_code' in locals() else "",
504
- execution_time=execution_time,
505
- error_message=error_msg,
506
- is_image=False
507
- )
508
-
509
- return {
510
- "role": "assistant",
511
- "content": f"❌ Error executing generated code: {error_msg}",
512
- "gen_code": full_code if 'full_code' in locals() else "",
513
- "ex_code": full_code if 'full_code' in locals() else "",
514
- "last_prompt": question,
515
- "error": error_msg
516
- }
517
-
518
- except Exception as e:
519
- execution_time = (datetime.now() - start_time).total_seconds()
520
- error_msg = str(e)
521
-
522
- # Handle specific API errors
523
- if "organization_restricted" in error_msg:
524
- response_content = "❌ API Organization Restricted: Your API key access has been restricted. Please check your Groq API key or try generating a new one."
525
- log_error_msg = "API access restricted"
526
- elif "rate_limit" in error_msg.lower():
527
- response_content = "❌ Rate limit exceeded. Please wait a moment and try again."
528
- log_error_msg = "Rate limit exceeded"
529
- else:
530
- response_content = f"❌ Error: {error_msg}"
531
- log_error_msg = error_msg
532
-
533
- # Log the failed interaction
534
- log_interaction(
535
- user_query=question,
536
- model_name=model_name,
537
- response_content=response_content,
538
- generated_code="",
539
- execution_time=execution_time,
540
- error_message=log_error_msg,
541
- is_image=False
542
- )
543
-
544
- return {
545
- "role": "assistant",
546
- "content": response_content,
547
- "gen_code": "",
548
- "ex_code": "",
549
- "last_prompt": question,
550
- "error": log_error_msg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
  }
 
1
+ SYSTEM_PROMPT = """
2
+ You have access to three pandas dataframes:
3
+
4
+ 1. Air Quality Data (df):
5
+ - Columns: 'Timestamp', 'station', 'PM2.5', 'PM10', 'address', 'city', 'latitude', 'longitude', 'state'
6
+ - Example row: ['2023-01-01', 'StationA', 45.67, 78.9, '123 Main St', 'Mumbai', 19.07, 72.87, 'Maharashtra']
7
+ - Frequency: daily
8
+ - 'pollution' generally means 'PM2.5'.
9
+ - PM2.5 guidelines: India: 60, WHO: 15. PM10 guidelines: India: 100, WHO: 50.
10
+
11
+ 2. NCAP Funding Data (ncap_data):
12
+ - Columns: 'city', 'state', 'funding_received', 'year', 'project', 'status'
13
+ - Example row: ['Mumbai', 'Maharashtra', 10000000, 2022, 'Clean Air Project', 'Ongoing']
14
+
15
+ 3. State Population Data (states_data):
16
+ - Columns: 'state', 'population', 'year', 'urban_population', 'rural_population'
17
+ - Example row: ['Maharashtra', 123000000, 2021, 60000000, 63000000]
18
+
19
+ You already have these dataframes loaded as df, ncap_data, and states_data. Do not read any files. Use these dataframes to answer questions about air quality, funding, or population. When aggregating, report standard deviation, standard error, and number of data points. Always report units. If a plot is required, follow the previous instructions for saving and reporting plots. If a question is about funding or population, use the relevant dataframe.
20
+ """
21
+
22
+ import os
23
+ import pandas as pd
24
+ from pandasai import Agent, SmartDataframe
25
+ from typing import Tuple
26
+ from PIL import Image
27
+ from pandasai.llm import HuggingFaceTextGen
28
+ from dotenv import load_dotenv
29
+ from langchain_groq import ChatGroq
30
+ from langchain_google_genai import ChatGoogleGenerativeAI
31
+ import matplotlib.pyplot as plt
32
+ import json
33
+ from datetime import datetime
34
+ from huggingface_hub import HfApi
35
+ import uuid
36
+
37
+ # FORCE reload environment variables
38
+ load_dotenv(override=True)
39
+
40
+ # Get API keys with explicit None handling and debugging
41
+ Groq_Token = os.getenv("GROQ_API_KEY")
42
+ hf_token = os.getenv("HF_TOKEN")
43
+ gemini_token = os.getenv("GEMINI_TOKEN")
44
+
45
+ # Debug print (remove in production)
46
+ print(f"Debug - Groq Token: {'Present' if Groq_Token else 'Missing'}")
47
+ print(f"Debug - Groq Token Value: {Groq_Token[:10] + '...' if Groq_Token else 'None'}")
48
+ print(f"Debug - Gemini Token: {'Present' if gemini_token else 'Missing'}")
49
+
50
+ models = {
51
+ "gpt-oss-20b": "openai/gpt-oss-20b",
52
+ "gpt-oss-120b": "openai/gpt-oss-120b",
53
+ "llama3.1": "llama-3.1-8b-instant",
54
+ "llama3.3": "llama-3.3-70b-versatile",
55
+ "deepseek-R1": "deepseek-r1-distill-llama-70b",
56
+ "llama4 maverik":"meta-llama/llama-4-maverick-17b-128e-instruct",
57
+ "llama4 scout":"meta-llama/llama-4-scout-17b-16e-instruct",
58
+ "gemini-pro": "gemini-1.5-pro"
59
+ }
60
+
61
+ def log_interaction(user_query, model_name, response_content, generated_code, execution_time, error_message=None, is_image=False):
62
+ """Log user interactions to Hugging Face dataset"""
63
+ try:
64
+ if not hf_token or hf_token.strip() == "":
65
+ print("Warning: HF_TOKEN not available, skipping logging")
66
+ return
67
+
68
+ # Create log entry
69
+ log_entry = {
70
+ "timestamp": datetime.now().isoformat(),
71
+ "session_id": str(uuid.uuid4()),
72
+ "user_query": user_query,
73
+ "model_name": model_name,
74
+ "response_content": str(response_content),
75
+ "generated_code": generated_code or "",
76
+ "execution_time_seconds": execution_time,
77
+ "error_message": error_message or "",
78
+ "is_image_output": is_image,
79
+ "success": error_message is None
80
+ }
81
+
82
+ # Create DataFrame
83
+ df = pd.DataFrame([log_entry])
84
+
85
+ # Create unique filename with timestamp
86
+ timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
87
+ random_id = str(uuid.uuid4())[:8]
88
+ filename = f"interaction_log_{timestamp_str}_{random_id}.parquet"
89
+
90
+ # Save locally first
91
+ local_path = f"/tmp/{filename}"
92
+ df.to_parquet(local_path, index=False)
93
+
94
+ # Upload to Hugging Face
95
+ api = HfApi(token=hf_token)
96
+ api.upload_file(
97
+ path_or_fileobj=local_path,
98
+ path_in_repo=f"data/{filename}",
99
+ repo_id="SustainabilityLabIITGN/VayuChat_logs",
100
+ repo_type="dataset",
101
+ )
102
+
103
+ # Clean up local file
104
+ if os.path.exists(local_path):
105
+ os.remove(local_path)
106
+
107
+ print(f"Successfully logged interaction to HuggingFace: {filename}")
108
+
109
+ except Exception as e:
110
+ print(f"Error logging interaction: {e}")
111
+
112
+ def preprocess_and_load_df(path: str) -> pd.DataFrame:
113
+ """Load and preprocess the dataframe"""
114
+ try:
115
+ df = pd.read_csv(path)
116
+ df["Timestamp"] = pd.to_datetime(df["Timestamp"])
117
+ return df
118
+ except Exception as e:
119
+ raise Exception(f"Error loading dataframe: {e}")
120
+
121
+ def load_agent(df: pd.DataFrame, context: str, inference_server: str, name="mistral") -> Agent:
122
+ """Load pandas AI agent with error handling"""
123
+ try:
124
+ if name == "gemini-pro":
125
+ if not gemini_token or gemini_token.strip() == "":
126
+ raise ValueError("Gemini API token not available or empty")
127
+ llm = ChatGoogleGenerativeAI(
128
+ model=models[name],
129
+ google_api_key=gemini_token,
130
+ temperature=0.1
131
+ )
132
+ else:
133
+ if not Groq_Token or Groq_Token.strip() == "":
134
+ raise ValueError("Groq API token not available or empty")
135
+ llm = ChatGroq(
136
+ model=models[name],
137
+ api_key=Groq_Token,
138
+ temperature=0.1
139
+ )
140
+
141
+ agent = Agent(df, config={"llm": llm, "enable_cache": False, "options": {"wait_for_model": True}})
142
+ if context:
143
+ agent.add_message(context)
144
+ return agent
145
+ except Exception as e:
146
+ raise Exception(f"Error loading agent: {e}")
147
+
148
+ def load_smart_df(df: pd.DataFrame, inference_server: str, name="mistral") -> SmartDataframe:
149
+ """Load smart dataframe with error handling"""
150
+ try:
151
+ if name == "gemini-pro":
152
+ if not gemini_token or gemini_token.strip() == "":
153
+ raise ValueError("Gemini API token not available or empty")
154
+ llm = ChatGoogleGenerativeAI(
155
+ model=models[name],
156
+ google_api_key=gemini_token,
157
+ temperature=0.1
158
+ )
159
+ else:
160
+ if not Groq_Token or Groq_Token.strip() == "":
161
+ raise ValueError("Groq API token not available or empty")
162
+ llm = ChatGroq(
163
+ model=models[name],
164
+ api_key=Groq_Token,
165
+ temperature=0.1
166
+ )
167
+
168
+ df = SmartDataframe(df, config={"llm": llm, "max_retries": 5, "enable_cache": False})
169
+ return df
170
+ except Exception as e:
171
+ raise Exception(f"Error loading smart dataframe: {e}")
172
+
173
+ def get_from_user(prompt):
174
+ """Format user prompt"""
175
+ return {"role": "user", "content": prompt}
176
+
177
+ def ask_agent(agent: Agent, prompt: str) -> dict:
178
+ """Ask agent with comprehensive error handling"""
179
+ start_time = datetime.now()
180
+ try:
181
+ response = agent.chat(prompt)
182
+ execution_time = (datetime.now() - start_time).total_seconds()
183
+
184
+ gen_code = getattr(agent, 'last_code_generated', '')
185
+ ex_code = getattr(agent, 'last_code_executed', '')
186
+ last_prompt = getattr(agent, 'last_prompt', prompt)
187
+
188
+ # Log the interaction
189
+ log_interaction(
190
+ user_query=prompt,
191
+ model_name="pandas_ai_agent",
192
+ response_content=response,
193
+ generated_code=gen_code,
194
+ execution_time=execution_time,
195
+ error_message=None,
196
+ is_image=isinstance(response, str) and any(response.endswith(ext) for ext in ['.png', '.jpg', '.jpeg'])
197
+ )
198
+
199
+ return {
200
+ "role": "assistant",
201
+ "content": response,
202
+ "gen_code": gen_code,
203
+ "ex_code": ex_code,
204
+ "last_prompt": last_prompt,
205
+ "error": None
206
+ }
207
+ except Exception as e:
208
+ execution_time = (datetime.now() - start_time).total_seconds()
209
+ error_msg = str(e)
210
+
211
+ # Log the failed interaction
212
+ log_interaction(
213
+ user_query=prompt,
214
+ model_name="pandas_ai_agent",
215
+ response_content=f"Error: {error_msg}",
216
+ generated_code="",
217
+ execution_time=execution_time,
218
+ error_message=error_msg,
219
+ is_image=False
220
+ )
221
+
222
+ return {
223
+ "role": "assistant",
224
+ "content": f"Error: {error_msg}",
225
+ "gen_code": "",
226
+ "ex_code": "",
227
+ "last_prompt": prompt,
228
+ "error": error_msg
229
+ }
230
+
231
+ def decorate_with_code(response: dict) -> str:
232
+ """Decorate response with code details"""
233
+ gen_code = response.get("gen_code", "No code generated")
234
+ last_prompt = response.get("last_prompt", "No prompt")
235
+
236
+ return f"""<details>
237
+ <summary>Generated Code</summary>
238
+
239
+ ```python
240
+ {gen_code}
241
+ ```
242
+ </details>
243
+
244
+ <details>
245
+ <summary>Prompt</summary>
246
+
247
+ {last_prompt}
248
+ """
249
+
250
+ def show_response(st, response):
251
+ """Display response with error handling"""
252
+ try:
253
+ with st.chat_message(response["role"]):
254
+ content = response.get("content", "No content")
255
+
256
+ try:
257
+ # Try to open as image
258
+ image = Image.open(content)
259
+ if response.get("gen_code"):
260
+ st.markdown(decorate_with_code(response), unsafe_allow_html=True)
261
+ st.image(image)
262
+ return {"is_image": True}
263
+ except:
264
+ # Not an image, display as text
265
+ if response.get("gen_code"):
266
+ display_content = decorate_with_code(response) + f"""</details>
267
+
268
+ {content}"""
269
+ else:
270
+ display_content = content
271
+ st.markdown(display_content, unsafe_allow_html=True)
272
+ return {"is_image": False}
273
+ except Exception as e:
274
+ st.error(f"Error displaying response: {e}")
275
+ return {"is_image": False}
276
+
277
+ def ask_question(model_name, question):
278
+ """Ask question with comprehensive error handling and logging"""
279
+ start_time = datetime.now()
280
+ try:
281
+ # Reload environment variables to get fresh values
282
+ load_dotenv(override=True)
283
+ fresh_groq_token = os.getenv("GROQ_API_KEY")
284
+ fresh_gemini_token = os.getenv("GEMINI_TOKEN")
285
+
286
+ print(f"ask_question - Fresh Groq Token: {'Present' if fresh_groq_token else 'Missing'}")
287
+
288
+ # Check API availability with fresh tokens
289
+ if model_name == "gemini-pro":
290
+ if not fresh_gemini_token or fresh_gemini_token.strip() == "":
291
+ execution_time = (datetime.now() - start_time).total_seconds()
292
+ error_msg = "Missing or empty API token"
293
+
294
+ # Log the failed interaction
295
+ log_interaction(
296
+ user_query=question,
297
+ model_name=model_name,
298
+ response_content="❌ Gemini API token not available or empty",
299
+ generated_code="",
300
+ execution_time=execution_time,
301
+ error_message=error_msg,
302
+ is_image=False
303
+ )
304
+
305
+ return {
306
+ "role": "assistant",
307
+ "content": "❌ Gemini API token not available or empty. Please set GEMINI_TOKEN in your environment variables.",
308
+ "gen_code": "",
309
+ "ex_code": "",
310
+ "last_prompt": question,
311
+ "error": error_msg
312
+ }
313
+ llm = ChatGoogleGenerativeAI(
314
+ model=models[model_name],
315
+ google_api_key=fresh_gemini_token,
316
+ temperature=0
317
+ )
318
+ else:
319
+ if not fresh_groq_token or fresh_groq_token.strip() == "":
320
+ execution_time = (datetime.now() - start_time).total_seconds()
321
+ error_msg = "Missing or empty API token"
322
+
323
+ # Log the failed interaction
324
+ log_interaction(
325
+ user_query=question,
326
+ model_name=model_name,
327
+ response_content="❌ Groq API token not available or empty",
328
+ generated_code="",
329
+ execution_time=execution_time,
330
+ error_message=error_msg,
331
+ is_image=False
332
+ )
333
+
334
+ return {
335
+ "role": "assistant",
336
+ "content": "❌ Groq API token not available or empty. Please set GROQ_API_KEY in your environment variables and restart the application.",
337
+ "gen_code": "",
338
+ "ex_code": "",
339
+ "last_prompt": question,
340
+ "error": error_msg
341
+ }
342
+
343
+ # Test the API key by trying to create the client
344
+ try:
345
+ llm = ChatGroq(
346
+ model=models[model_name],
347
+ api_key=fresh_groq_token,
348
+ temperature=0.1
349
+ )
350
+ # Test with a simple call to verify the API key works
351
+ test_response = llm.invoke("Test")
352
+ print("API key test successful")
353
+ except Exception as api_error:
354
+ execution_time = (datetime.now() - start_time).total_seconds()
355
+ error_msg = str(api_error)
356
+
357
+ if "organization_restricted" in error_msg.lower() or "unauthorized" in error_msg.lower():
358
+ response_content = "❌ API Key Error: Your Groq API key appears to be invalid, expired, or restricted. Please check your API key in the .env file."
359
+ log_error_msg = f"API key validation failed: {error_msg}"
360
+ else:
361
+ response_content = f"❌ API Connection Error: {error_msg}"
362
+ log_error_msg = error_msg
363
+
364
+ # Log the failed interaction
365
+ log_interaction(
366
+ user_query=question,
367
+ model_name=model_name,
368
+ response_content=response_content,
369
+ generated_code="",
370
+ execution_time=execution_time,
371
+ error_message=log_error_msg,
372
+ is_image=False
373
+ )
374
+
375
+ return {
376
+ "role": "assistant",
377
+ "content": response_content,
378
+ "gen_code": "",
379
+ "ex_code": "",
380
+ "last_prompt": question,
381
+ "error": log_error_msg
382
+ }
383
+
384
+ # Check if data file exists
385
+ if not os.path.exists("Data.csv"):
386
+ execution_time = (datetime.now() - start_time).total_seconds()
387
+ error_msg = "Data file not found"
388
+
389
+ # Log the failed interaction
390
+ log_interaction(
391
+ user_query=question,
392
+ model_name=model_name,
393
+ response_content="❌ Data.csv file not found",
394
+ generated_code="",
395
+ execution_time=execution_time,
396
+ error_message=error_msg,
397
+ is_image=False
398
+ )
399
+
400
+ return {
401
+ "role": "assistant",
402
+ "content": "Data.csv file not found. Please ensure the data file is in the correct location.",
403
+ "gen_code": "",
404
+ "ex_code": "",
405
+ "last_prompt": question,
406
+ "error": error_msg
407
+ }
408
+
409
+ df_check = pd.read_csv("Data.csv")
410
+ df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"])
411
+ df_check = df_check.head(5)
412
+
413
+ new_line = "\n"
414
+ parameters = {"font.size": 12, "figure.dpi": 600}
415
+
416
+ template = f"""```python
417
+ import pandas as pd
418
+ import matplotlib.pyplot as plt
419
+ import uuid
420
+
421
+ plt.rcParams.update({parameters})
422
+
423
+ df = pd.read_csv("Data.csv")
424
+ df["Timestamp"] = pd.to_datetime(df["Timestamp"])
425
+
426
+ # Available columns and data types:
427
+ {new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))}
428
+
429
+ # Question: {question.strip()}
430
+ # Generate code to answer the question and save result in 'answer' variable
431
+ # If creating a plot, save it with a unique filename and store the filename in 'answer'
432
+ # If returning text/numbers, store the result directly in 'answer'
433
+ ```"""
434
+
435
+ system_prompt = """You are a helpful assistant that generates Python code for data analysis.
436
+
437
+ Rules:
438
+ 1. Always save your final result in a variable called 'answer'
439
+ 2. If creating a plot, save it with plt.savefig() and store the filename in 'answer'
440
+ 3. If returning text/numbers, store the result directly in 'answer'
441
+ 4. Use descriptive variable names and add comments
442
+ 5. Handle potential errors gracefully
443
+ 6. For plots, use unique filenames to avoid conflicts
444
+ """
445
+
446
+ query = f"""{system_prompt}
447
+
448
+ Complete the following code to answer the user's question:
449
+
450
+ {template}
451
+ """
452
+
453
+ # Make API call
454
+ if model_name == "gemini-pro":
455
+ response = llm.invoke(query)
456
+ answer = response.content
457
+ else:
458
+ response = llm.invoke(query)
459
+ answer = response.content
460
+
461
+ # Extract and execute code
462
+ try:
463
+ if "```python" in answer:
464
+ code_part = answer.split("```python")[1].split("```")[0]
465
+ else:
466
+ code_part = answer
467
+
468
+ full_code = f"""
469
+ {template.split("```python")[1].split("```")[0]}
470
+ {code_part}
471
+ """
472
+
473
+ # Execute code in a controlled environment
474
+ local_vars = {}
475
+ global_vars = {
476
+ 'pd': pd,
477
+ 'plt': plt,
478
+ 'os': os,
479
+ 'uuid': __import__('uuid')
480
+ }
481
+
482
+ exec(full_code, global_vars, local_vars)
483
+
484
+ # Get the answer
485
+ if 'answer' in local_vars:
486
+ answer_result = local_vars['answer']
487
+ else:
488
+ answer_result = "No answer variable found in generated code"
489
+
490
+ execution_time = (datetime.now() - start_time).total_seconds()
491
+
492
+ # Determine if output is an image
493
+ is_image = isinstance(answer_result, str) and any(answer_result.endswith(ext) for ext in ['.png', '.jpg', '.jpeg'])
494
+
495
+ # Log successful interaction
496
+ log_interaction(
497
+ user_query=question,
498
+ model_name=model_name,
499
+ response_content=str(answer_result),
500
+ generated_code=full_code,
501
+ execution_time=execution_time,
502
+ error_message=None,
503
+ is_image=is_image
504
+ )
505
+
506
+ return {
507
+ "role": "assistant",
508
+ "content": answer_result,
509
+ "gen_code": full_code,
510
+ "ex_code": full_code,
511
+ "last_prompt": question,
512
+ "error": None
513
+ }
514
+
515
+ except Exception as code_error:
516
+ execution_time = (datetime.now() - start_time).total_seconds()
517
+ error_msg = str(code_error)
518
+
519
+ # Log the failed code execution
520
+ log_interaction(
521
+ user_query=question,
522
+ model_name=model_name,
523
+ response_content=f" Error executing generated code: {error_msg}",
524
+ generated_code=full_code if 'full_code' in locals() else "",
525
+ execution_time=execution_time,
526
+ error_message=error_msg,
527
+ is_image=False
528
+ )
529
+
530
+ return {
531
+ "role": "assistant",
532
+ "content": f"❌ Error executing generated code: {error_msg}",
533
+ "gen_code": full_code if 'full_code' in locals() else "",
534
+ "ex_code": full_code if 'full_code' in locals() else "",
535
+ "last_prompt": question,
536
+ "error": error_msg
537
+ }
538
+
539
+ except Exception as e:
540
+ execution_time = (datetime.now() - start_time).total_seconds()
541
+ error_msg = str(e)
542
+
543
+ # Handle specific API errors
544
+ if "organization_restricted" in error_msg:
545
+ response_content = "❌ API Organization Restricted: Your API key access has been restricted. Please check your Groq API key or try generating a new one."
546
+ log_error_msg = "API access restricted"
547
+ elif "rate_limit" in error_msg.lower():
548
+ response_content = " Rate limit exceeded. Please wait a moment and try again."
549
+ log_error_msg = "Rate limit exceeded"
550
+ else:
551
+ response_content = f"❌ Error: {error_msg}"
552
+ log_error_msg = error_msg
553
+
554
+ # Log the failed interaction
555
+ log_interaction(
556
+ user_query=question,
557
+ model_name=model_name,
558
+ response_content=response_content,
559
+ generated_code="",
560
+ execution_time=execution_time,
561
+ error_message=log_error_msg,
562
+ is_image=False
563
+ )
564
+
565
+ return {
566
+ "role": "assistant",
567
+ "content": response_content,
568
+ "gen_code": "",
569
+ "ex_code": "",
570
+ "last_prompt": question,
571
+ "error": log_error_msg
572
  }
states_data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05df4f6a959d683b1fe1f978887d7af54b8d577a38a39b339d238709023466bf
3
+ size 1782