danielrosehill commited on
Commit
5d649a8
·
1 Parent(s): 5111641

Fix dataset loading and examples format issues

Browse files

- Enhanced dataset loading with fallback to individual configurations
- Fixed examples format for ChatInterface with additional inputs
- Added more robust error handling for dataset loading
- Improved sample dataset for testing when real data fails

Fixes runtime errors in HuggingFace Spaces deployment

Files changed (1) hide show
  1. app.py +49 -17
app.py CHANGED
@@ -17,23 +17,55 @@ class GVFDChatbot:
17
  def load_data(self):
18
  """Load the Global Value Factor Database from HuggingFace"""
19
  try:
20
- self.dataset = load_dataset(
21
- "danielrosehill/Global-Value-Factor-Database-Refactor-V2",
22
- split='validation'
23
- )
24
- self.df = pd.DataFrame(self.dataset)
25
- print(f"Dataset loaded successfully with {len(self.df)} records")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  except Exception as e:
27
  print(f"Error loading dataset: {e}")
28
- # Create a sample dataset for testing
 
29
  self.df = pd.DataFrame({
30
- 'category': ['Air Pollution', 'Water Consumption', 'Waste Generation'] * 10,
31
- 'impact': ['CO2 Emissions', 'Water Usage', 'Solid Waste'] * 10,
32
- 'value_factor': [185.50, 125.75, 95.25] * 10,
33
- 'country': ['USA', 'Germany', 'Japan'] * 10,
34
- 'units': ['USD per ton CO2', 'USD per m3', 'USD per ton'] * 10
 
35
  })
36
- print("Using sample dataset for testing")
37
 
38
  def search_value_factors(self, query: str, category: str = "all") -> List[Dict]:
39
  """Search for value factors based on query and category"""
@@ -184,10 +216,10 @@ chatbot = gr.ChatInterface(
184
  title="🌍 GVFD Explorer",
185
  description="AI-powered exploration of the Global Value Factor Database. Search for environmental impact value factors, perform calculations, and get intelligent guidance.",
186
  examples=[
187
- "Value factor for CO2 emissions in Germany",
188
- "Find air pollution factors for USA",
189
- "Calculate impact for 100 tons with factor 185.50",
190
- "What's the water consumption factor in Japan?"
191
  ],
192
  additional_inputs=[
193
  gr.Textbox(
 
17
  def load_data(self):
18
  """Load the Global Value Factor Database from HuggingFace"""
19
  try:
20
+ # Try different approaches to load the dataset
21
+ print("Loading GVFD dataset...")
22
+
23
+ # Try loading individual files if the combined dataset fails
24
+ try:
25
+ self.dataset = load_dataset(
26
+ "danielrosehill/Global-Value-Factor-Database-Refactor-V2",
27
+ split='validation'
28
+ )
29
+ self.df = pd.DataFrame(self.dataset)
30
+ print(f"Dataset loaded successfully with {len(self.df)} records")
31
+ except:
32
+ # Try loading just the first configuration
33
+ configs = ["airpollution", "waterconsumption", "waste", "waterpollution"]
34
+ dfs = []
35
+ for config in configs:
36
+ try:
37
+ dataset = load_dataset(
38
+ "danielrosehill/Global-Value-Factor-Database-Refactor-V2",
39
+ name=config,
40
+ split='validation'
41
+ )
42
+ df_part = pd.DataFrame(dataset)
43
+ if not df_part.empty:
44
+ dfs.append(df_part)
45
+ except Exception as config_error:
46
+ print(f"Failed to load {config}: {config_error}")
47
+ continue
48
+
49
+ if dfs:
50
+ # Combine all successfully loaded dataframes
51
+ self.df = pd.concat(dfs, ignore_index=True, sort=False)
52
+ print(f"Dataset loaded from {len(dfs)} configurations with {len(self.df)} total records")
53
+ else:
54
+ raise Exception("No configurations loaded successfully")
55
+
56
  except Exception as e:
57
  print(f"Error loading dataset: {e}")
58
+ print("Using sample dataset for testing")
59
+ # Create a more comprehensive sample dataset for testing
60
  self.df = pd.DataFrame({
61
+ 'category': ['Air Pollution', 'Water Consumption', 'Waste Generation', 'Land Use', 'Water Pollution'] * 20,
62
+ 'impact': ['CO2 Emissions', 'Water Usage', 'Solid Waste', 'Land Conversion', 'Water Contamination'] * 20,
63
+ 'value_factor': [185.50, 125.75, 95.25, 205.30, 167.80] * 20,
64
+ 'country': ['USA', 'Germany', 'Japan', 'Brazil', 'India'] * 20,
65
+ 'units': ['USD per ton CO2', 'USD per m3', 'USD per ton', 'USD per hectare', 'USD per m3'] * 20,
66
+ 'region': ['North America', 'Europe', 'Asia', 'South America', 'Asia'] * 20
67
  })
68
+ print(f"Sample dataset created with {len(self.df)} records")
69
 
70
  def search_value_factors(self, query: str, category: str = "all") -> List[Dict]:
71
  """Search for value factors based on query and category"""
 
216
  title="🌍 GVFD Explorer",
217
  description="AI-powered exploration of the Global Value Factor Database. Search for environmental impact value factors, perform calculations, and get intelligent guidance.",
218
  examples=[
219
+ ["Value factor for CO2 emissions in Germany", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95],
220
+ ["Find air pollution factors for USA", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95],
221
+ ["Calculate impact for 100 tons with factor 185.50", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95],
222
+ ["What's the water consumption factor in Japan?", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95]
223
  ],
224
  additional_inputs=[
225
  gr.Textbox(