danielrosehill commited on
Commit
97ef6e1
ยท
1 Parent(s): 5d649a8

Revert to simple rule-based chatbot without OAuth

Browse files

- Removed OAuth complexity that was causing deployment issues
- Back to intelligent rule-based responses with enhanced guidance
- Smart handling of value factor queries with alternatives
- Comprehensive fallback responses when data not found
- Clean, simple Gradio interface without authentication requirements
- Robust dataset loading with sample data fallback

Works reliably without external dependencies or OAuth setup

Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +167 -161
README.md CHANGED
@@ -7,7 +7,7 @@ sdk: gradio
7
  sdk_version: 5.42.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: AI chat for exploring Global Value Factor Database
11
  ---
12
 
13
  # Global Value Factor Database Explorer
 
7
  sdk_version: 5.42.0
8
  app_file: app.py
9
  pinned: false
10
+ short_description: Chat interface for Global Value Factor Database
11
  ---
12
 
13
  # Global Value Factor Database Explorer
app.py CHANGED
@@ -5,8 +5,6 @@ from datasets import load_dataset
5
  import json
6
  from typing import Dict, List, Any, Optional
7
  import re
8
- from huggingface_hub import InferenceClient
9
-
10
 
11
  class GVFDChatbot:
12
  def __init__(self):
@@ -17,46 +15,18 @@ class GVFDChatbot:
17
  def load_data(self):
18
  """Load the Global Value Factor Database from HuggingFace"""
19
  try:
20
- # Try different approaches to load the dataset
21
  print("Loading GVFD dataset...")
22
-
23
- # Try loading individual files if the combined dataset fails
24
- try:
25
- self.dataset = load_dataset(
26
- "danielrosehill/Global-Value-Factor-Database-Refactor-V2",
27
- split='validation'
28
- )
29
- self.df = pd.DataFrame(self.dataset)
30
- print(f"Dataset loaded successfully with {len(self.df)} records")
31
- except:
32
- # Try loading just the first configuration
33
- configs = ["airpollution", "waterconsumption", "waste", "waterpollution"]
34
- dfs = []
35
- for config in configs:
36
- try:
37
- dataset = load_dataset(
38
- "danielrosehill/Global-Value-Factor-Database-Refactor-V2",
39
- name=config,
40
- split='validation'
41
- )
42
- df_part = pd.DataFrame(dataset)
43
- if not df_part.empty:
44
- dfs.append(df_part)
45
- except Exception as config_error:
46
- print(f"Failed to load {config}: {config_error}")
47
- continue
48
-
49
- if dfs:
50
- # Combine all successfully loaded dataframes
51
- self.df = pd.concat(dfs, ignore_index=True, sort=False)
52
- print(f"Dataset loaded from {len(dfs)} configurations with {len(self.df)} total records")
53
- else:
54
- raise Exception("No configurations loaded successfully")
55
-
56
  except Exception as e:
57
  print(f"Error loading dataset: {e}")
58
  print("Using sample dataset for testing")
59
- # Create a more comprehensive sample dataset for testing
60
  self.df = pd.DataFrame({
61
  'category': ['Air Pollution', 'Water Consumption', 'Waste Generation', 'Land Use', 'Water Pollution'] * 20,
62
  'impact': ['CO2 Emissions', 'Water Usage', 'Solid Waste', 'Land Conversion', 'Water Contamination'] * 20,
@@ -110,150 +80,186 @@ class GVFDChatbot:
110
  "calculation": f"{impact_quantity} ร— {value_factor} = ${monetary_impact:,.2f}"
111
  }
112
 
113
-
114
- # Initialize the chatbot
115
- gvfd_bot = GVFDChatbot()
116
-
117
- def respond(
118
- message,
119
- history: list[dict[str, str]],
120
- system_message,
121
- max_tokens,
122
- temperature,
123
- top_p,
124
- hf_token: gr.OAuthToken,
125
- ):
126
- """Enhanced GVFD response with AI integration"""
127
- # First, try to handle with GVFD-specific logic
128
- message_lower = message.lower()
129
-
130
- # Handle calculations
131
- if "calculate" in message_lower:
132
- numbers = re.findall(r'\d+(?:\.\d+)?', message)
133
- if len(numbers) >= 2:
134
- try:
135
- quantity = float(numbers[0])
136
- factor = float(numbers[1])
137
- result = gvfd_bot.calculate_impact_value(quantity, factor)
138
- if "error" not in result:
139
- yield f"๐Ÿ’ฐ **Impact Calculation**\n\n{result['calculation']}\n\n**Monetary Impact:** ${result['monetary_impact_usd']:,}"
140
- return
141
- except:
142
- pass
143
-
144
- # Handle searches
145
- elif any(keyword in message_lower for keyword in ["search", "find", "factor", "value factor for"]):
146
- search_terms = message_lower
147
- for word in ["search", "find", "factor", "value factor for"]:
148
- search_terms = search_terms.replace(word, "")
149
- search_terms = search_terms.strip()
150
 
151
- results = gvfd_bot.search_value_factors(search_terms)
 
 
 
 
152
 
153
- if results:
154
- response = f"๐Ÿ” **Found {len(results)} value factors:**\n\n"
155
- for i, result in enumerate(results[:3], 1):
156
- key_fields = ['category', 'impact', 'value_factor', 'country', 'units']
157
- shown = []
158
- for field in key_fields:
159
- if field in result and pd.notna(result[field]):
160
- shown.append(f"{result[field]}")
161
- response += f"**{i}.** " + " | ".join(shown[:3]) + "\n\n"
162
- yield response
163
- return
164
 
165
- # For general queries, use AI with GVFD context
166
- if hf_token and hf_token.token:
167
- try:
168
- client = InferenceClient(token=hf_token.token, model="meta-llama/Llama-2-7b-chat-hf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
- # Enhanced system message for GVFD
171
- enhanced_system = f"""{system_message}
172
 
173
- You are specifically helping with the Global Value Factor Database (GVFD).
174
- This database converts environmental impacts to USD values. Key categories include:
175
- - Air pollution, Water consumption, Waste generation, Land use
176
- - Covers 229 countries with standardized monetary conversion factors
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
- When users ask about value factors, provide helpful guidance and suggest alternatives if exact matches aren't found."""
 
 
 
179
 
180
- messages = [{"role": "system", "content": enhanced_system}]
181
- messages.extend(history)
182
- messages.append({"role": "user", "content": message})
183
 
184
- response = ""
185
- for msg in client.chat_completion(
186
- messages,
187
- max_tokens=max_tokens,
188
- stream=True,
189
- temperature=temperature,
190
- top_p=top_p,
191
- ):
192
- choices = msg.choices
193
- token = ""
194
- if len(choices) and choices[0].delta.content:
195
- token = choices[0].delta.content
196
- response += token
197
- yield response
198
 
199
- except Exception as e:
200
- yield f"โš ๏ธ AI Error: {str(e)}"
201
- else:
202
- # Fallback response
203
- yield """๐Ÿ‘‹ **Welcome to the Global Value Factor Database Explorer!**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
- ๐Ÿ” **Search**: "Find air pollution factors for USA"
206
- ๐Ÿงฎ **Calculate**: "Calculate impact for 100 tons with factor 185.50"
207
- ๐ŸŒ **Explore**: "Value factor for CO2 emissions in Germany"
 
208
 
209
- ๐Ÿ’ก For enhanced AI responses, please sign in with your Hugging Face account."""
 
210
 
 
 
 
211
 
212
- # Create the interface with OAuth
213
- chatbot = gr.ChatInterface(
214
- respond,
215
- type="messages",
216
- title="๐ŸŒ GVFD Explorer",
217
- description="AI-powered exploration of the Global Value Factor Database. Search for environmental impact value factors, perform calculations, and get intelligent guidance.",
218
- examples=[
219
- ["Value factor for CO2 emissions in Germany", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95],
220
- ["Find air pollution factors for USA", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95],
221
- ["Calculate impact for 100 tons with factor 185.50", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95],
222
- ["What's the water consumption factor in Japan?", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95]
223
- ],
224
- additional_inputs=[
225
- gr.Textbox(
226
- value="You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.",
227
- label="System message"
228
- ),
229
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
230
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
231
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
232
- ],
233
- )
234
 
235
- with gr.Blocks(title="GVFD Explorer") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  gr.Markdown("""
237
  # ๐ŸŒ Global Value Factor Database Explorer
238
 
 
 
239
  **Dataset**: [Global Value Factor Database Refactor V2](https://huggingface.co/datasets/danielrosehill/Global-Value-Factor-Database-Refactor-V2)
240
  **Source**: International Foundation for Valuing Impacts (IFVI)
241
  **Coverage**: 229 countries, environmental impact monetization
242
  """)
243
 
244
- with gr.Row():
245
- with gr.Column(scale=1):
246
- gr.LoginButton()
247
- gr.Markdown("""
248
- **Sign in for enhanced AI responses!**
249
-
250
- โ€ข Advanced contextual assistance
251
- โ€ข Smart alternatives when data isn't found
252
- โ€ข Detailed explanations of value factors
253
- """)
254
-
255
- with gr.Column(scale=4):
256
- chatbot.render()
257
 
258
  if __name__ == "__main__":
259
- demo.launch()
 
5
  import json
6
  from typing import Dict, List, Any, Optional
7
  import re
 
 
8
 
9
  class GVFDChatbot:
10
  def __init__(self):
 
15
  def load_data(self):
16
  """Load the Global Value Factor Database from HuggingFace"""
17
  try:
 
18
  print("Loading GVFD dataset...")
19
+ self.dataset = load_dataset(
20
+ "danielrosehill/Global-Value-Factor-Database-Refactor-V2",
21
+ split='train' # Try train split instead
22
+ )
23
+ self.df = pd.DataFrame(self.dataset)
24
+ print(f"Dataset loaded successfully with {len(self.df)} records")
25
+ print(f"Columns: {list(self.df.columns)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  except Exception as e:
27
  print(f"Error loading dataset: {e}")
28
  print("Using sample dataset for testing")
29
+ # Create a comprehensive sample dataset
30
  self.df = pd.DataFrame({
31
  'category': ['Air Pollution', 'Water Consumption', 'Waste Generation', 'Land Use', 'Water Pollution'] * 20,
32
  'impact': ['CO2 Emissions', 'Water Usage', 'Solid Waste', 'Land Conversion', 'Water Contamination'] * 20,
 
80
  "calculation": f"{impact_quantity} ร— {value_factor} = ${monetary_impact:,.2f}"
81
  }
82
 
83
+ def get_country_factors(self, country: str) -> List[Dict]:
84
+ """Get all value factors for a specific country"""
85
+ if self.df is None or self.df.empty:
86
+ return []
87
+
88
+ country_data = []
89
+ country_columns = [col for col in self.df.columns if 'country' in col.lower()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
+ for _, row in self.df.iterrows():
92
+ for col in country_columns:
93
+ if pd.notna(row[col]) and country.lower() in str(row[col]).lower():
94
+ country_data.append(row.to_dict())
95
+ break
96
 
97
+ return country_data
 
 
 
 
 
 
 
 
 
 
98
 
99
+ def process_chat_message(self, message: str, history: List[List[str]]) -> str:
100
+ """Process chat message and return response"""
101
+ message_lower = message.lower()
102
+
103
+ # Handle calculations
104
+ if "calculate" in message_lower:
105
+ numbers = re.findall(r'\d+(?:\.\d+)?', message)
106
+ if len(numbers) >= 2:
107
+ try:
108
+ quantity = float(numbers[0])
109
+ factor = float(numbers[1])
110
+ result = self.calculate_impact_value(quantity, factor)
111
+ if "error" not in result:
112
+ return f"๐Ÿ’ฐ **Impact Calculation**\n\n{result['calculation']}\n\n**Monetary Impact:** ${result['monetary_impact_usd']:,}\n\nThis represents the monetary value of the environmental impact based on the Global Value Factor Database methodology."
113
+ except:
114
+ pass
115
+ return "๐Ÿ”ข To calculate impact value, please provide:\n- Impact quantity (e.g., 100 tons CO2)\n- Value factor (e.g., 185.50)\n\nExample: 'Calculate impact for 100 units with factor 185.50'"
116
+
117
+ # Handle searches and "value factor for X in Y" queries
118
+ elif any(keyword in message_lower for keyword in ["search", "find", "factor", "value factor for", " in "]):
119
+ search_terms = message_lower
120
+ for word in ["search", "find", "factor", "value factor for"]:
121
+ search_terms = search_terms.replace(word, "")
122
+ search_terms = search_terms.strip()
123
 
124
+ results = self.search_value_factors(search_terms)
 
125
 
126
+ if results:
127
+ response = f"๐Ÿ” **Found {len(results)} value factors matching '{search_terms}':**\n\n"
128
+ for i, result in enumerate(results[:5], 1):
129
+ response += f"**{i}.** "
130
+ key_fields = ['category', 'impact', 'value_factor', 'country', 'units', 'region']
131
+ shown_fields = []
132
+
133
+ for field in key_fields:
134
+ if field in result and pd.notna(result[field]):
135
+ shown_fields.append(f"**{field.replace('_', ' ').title()}**: {result[field]}")
136
+
137
+ response += " | ".join(shown_fields[:4]) + "\n\n"
138
+
139
+ # Add guidance
140
+ response += "๐Ÿ’ก **Need alternatives?** If you don't see exactly what you're looking for:\n"
141
+ response += "โ€ข Try related terms (e.g., 'methane' instead of 'CH4')\n"
142
+ response += "โ€ข Search by broader category (e.g., 'air pollution Germany')\n"
143
+ response += "โ€ข Consider regional averages if country-specific data isn't available"
144
+
145
+ return response
146
+ else:
147
+ # Provide helpful guidance when no results found
148
+ return f"""โŒ **No exact matches found for '{search_terms}'**
149
+
150
+ ๐Ÿ” **Try these alternatives:**
151
+ โ€ข **Broaden your search**: Use general terms like 'air pollution', 'water', or 'waste'
152
+ โ€ข **Check spelling**: Ensure country names and impact types are correct
153
+ โ€ข **Try related terms**: 'CO2' vs 'carbon dioxide', 'methane' vs 'CH4'
154
+ โ€ข **Search by region**: Some factors may be available at regional rather than country level
155
+
156
+ ๐Ÿ“Š **Available categories**: Air Pollution, Water Consumption, Water Pollution, Waste Generation, Land Use
157
+
158
+ ๐Ÿ’ก **Example searches:**
159
+ - "air pollution Germany"
160
+ - "water consumption Japan"
161
+ - "CO2 emissions USA" """
162
+
163
+ # Country-specific queries
164
+ elif "country" in message_lower or " in " in message_lower:
165
+ # Extract country name
166
+ words = message.split()
167
+ country_candidates = []
168
 
169
+ if " in " in message_lower:
170
+ in_index = message_lower.split().index("in")
171
+ if in_index + 1 < len(words):
172
+ country_candidates.append(words[in_index + 1])
173
 
174
+ for word in words:
175
+ if len(word) > 2 and (word[0].isupper() or word.lower() in ['usa', 'uk', 'us']):
176
+ country_candidates.append(word)
177
 
178
+ if country_candidates:
179
+ country = country_candidates[-1]
180
+ results = self.get_country_factors(country)
 
 
 
 
 
 
 
 
 
 
 
181
 
182
+ if results:
183
+ response = f"๐ŸŒ **Value factors for {country.title()}:**\n\nFound {len(results)} factors.\n\n"
184
+ response += "Use a more specific search like:\n"
185
+ response += f"โ€ข 'air pollution {country}'\n"
186
+ response += f"โ€ข 'water consumption {country}'\n"
187
+ response += f"โ€ข 'waste generation {country}'"
188
+ return response
189
+ else:
190
+ return f"""โŒ **No value factors found for {country.title()}**
191
+
192
+ This could mean:
193
+ โ€ข **Country not in database**: The GVFD may not have data for this country
194
+ โ€ข **Different naming**: Try alternative country names (e.g., 'USA' vs 'United States')
195
+ โ€ข **Regional data**: Check if regional factors are available instead
196
+
197
+ ๐ŸŒ **Try nearby countries** or **regional averages** for similar economic/environmental conditions."""
198
+
199
+ # General help
200
+ else:
201
+ return """๐Ÿ‘‹ **Welcome to the Global Value Factor Database Explorer!**
202
+
203
+ I help you explore environmental impact value factors that convert impacts into monetary values (USD).
204
 
205
+ ๐Ÿ” **Search Examples:**
206
+ โ€ข "Value factor for CO2 emissions in Germany"
207
+ โ€ข "Find air pollution factors for USA"
208
+ โ€ข "Water consumption factor Japan"
209
 
210
+ ๐Ÿงฎ **Calculate Impact:**
211
+ โ€ข "Calculate impact for 100 tons with factor 185.50"
212
 
213
+ ๐ŸŒ **Explore by Country/Category:**
214
+ โ€ข "Air pollution factors Germany"
215
+ โ€ข "Water consumption factors"
216
 
217
+ ๐Ÿ“Š **Available Categories:**
218
+ - Air pollution (CO2, methane, particulates, etc.)
219
+ - Water consumption & pollution
220
+ - Waste generation
221
+ - Land use changes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
+ ๐Ÿ’ก **What are Value Factors?**
224
+ Value factors convert environmental impacts (like tons of CO2) into monetary values (USD) to help with:
225
+ - Environmental accounting
226
+ - ESG reporting
227
+ - Impact assessment
228
+ - Policy analysis
229
+
230
+ **Dataset**: 229 countries | Source: International Foundation for Valuing Impacts (IFVI)
231
+
232
+ What would you like to explore?"""
233
+
234
+ # Initialize the chatbot
235
+ chatbot = GVFDChatbot()
236
+
237
+ def chat_interface(message, history):
238
+ return chatbot.process_chat_message(message, history)
239
+
240
+ # Create simple Gradio interface
241
+ with gr.Blocks(title="GVFD Explorer", theme=gr.themes.Soft()) as app:
242
  gr.Markdown("""
243
  # ๐ŸŒ Global Value Factor Database Explorer
244
 
245
+ **Interactive assistant for exploring environmental impact value factors**
246
+
247
  **Dataset**: [Global Value Factor Database Refactor V2](https://huggingface.co/datasets/danielrosehill/Global-Value-Factor-Database-Refactor-V2)
248
  **Source**: International Foundation for Valuing Impacts (IFVI)
249
  **Coverage**: 229 countries, environmental impact monetization
250
  """)
251
 
252
+ chatbot_interface = gr.ChatInterface(
253
+ chat_interface,
254
+ title="๐Ÿ’ฌ Chat with GVFD Assistant",
255
+ description="Search for environmental impact value factors, perform calculations, and get intelligent guidance.",
256
+ examples=[
257
+ "Value factor for CO2 emissions in Germany",
258
+ "Find air pollution factors for USA",
259
+ "Calculate impact for 100 tons with factor 185.50",
260
+ "What's the water consumption factor in Japan?"
261
+ ]
262
+ )
 
 
263
 
264
  if __name__ == "__main__":
265
+ app.launch()