Spaces:
Sleeping
Sleeping
Commit
·
5d649a8
1
Parent(s):
5111641
Fix dataset loading and examples format issues
Browse files- Enhanced dataset loading with fallback to individual configurations
- Fixed examples format for ChatInterface with additional inputs
- Added more robust error handling for dataset loading
- Improved sample dataset for testing when real data fails
Fixes runtime errors in HuggingFace Spaces deployment
app.py
CHANGED
|
@@ -17,23 +17,55 @@ class GVFDChatbot:
|
|
| 17 |
def load_data(self):
|
| 18 |
"""Load the Global Value Factor Database from HuggingFace"""
|
| 19 |
try:
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
except Exception as e:
|
| 27 |
print(f"Error loading dataset: {e}")
|
| 28 |
-
|
|
|
|
| 29 |
self.df = pd.DataFrame({
|
| 30 |
-
'category': ['Air Pollution', 'Water Consumption', 'Waste Generation'] *
|
| 31 |
-
'impact': ['CO2 Emissions', 'Water Usage', 'Solid Waste'] *
|
| 32 |
-
'value_factor': [185.50, 125.75, 95.25] *
|
| 33 |
-
'country': ['USA', 'Germany', 'Japan'] *
|
| 34 |
-
'units': ['USD per ton CO2', 'USD per m3', 'USD per ton'] *
|
|
|
|
| 35 |
})
|
| 36 |
-
print("
|
| 37 |
|
| 38 |
def search_value_factors(self, query: str, category: str = "all") -> List[Dict]:
|
| 39 |
"""Search for value factors based on query and category"""
|
|
@@ -184,10 +216,10 @@ chatbot = gr.ChatInterface(
|
|
| 184 |
title="🌍 GVFD Explorer",
|
| 185 |
description="AI-powered exploration of the Global Value Factor Database. Search for environmental impact value factors, perform calculations, and get intelligent guidance.",
|
| 186 |
examples=[
|
| 187 |
-
"Value factor for CO2 emissions in Germany",
|
| 188 |
-
"Find air pollution factors for USA",
|
| 189 |
-
"Calculate impact for 100 tons with factor 185.50",
|
| 190 |
-
"What's the water consumption factor in Japan?"
|
| 191 |
],
|
| 192 |
additional_inputs=[
|
| 193 |
gr.Textbox(
|
|
|
|
| 17 |
def load_data(self):
|
| 18 |
"""Load the Global Value Factor Database from HuggingFace"""
|
| 19 |
try:
|
| 20 |
+
# Try different approaches to load the dataset
|
| 21 |
+
print("Loading GVFD dataset...")
|
| 22 |
+
|
| 23 |
+
# Try loading individual files if the combined dataset fails
|
| 24 |
+
try:
|
| 25 |
+
self.dataset = load_dataset(
|
| 26 |
+
"danielrosehill/Global-Value-Factor-Database-Refactor-V2",
|
| 27 |
+
split='validation'
|
| 28 |
+
)
|
| 29 |
+
self.df = pd.DataFrame(self.dataset)
|
| 30 |
+
print(f"Dataset loaded successfully with {len(self.df)} records")
|
| 31 |
+
except:
|
| 32 |
+
# Try loading just the first configuration
|
| 33 |
+
configs = ["airpollution", "waterconsumption", "waste", "waterpollution"]
|
| 34 |
+
dfs = []
|
| 35 |
+
for config in configs:
|
| 36 |
+
try:
|
| 37 |
+
dataset = load_dataset(
|
| 38 |
+
"danielrosehill/Global-Value-Factor-Database-Refactor-V2",
|
| 39 |
+
name=config,
|
| 40 |
+
split='validation'
|
| 41 |
+
)
|
| 42 |
+
df_part = pd.DataFrame(dataset)
|
| 43 |
+
if not df_part.empty:
|
| 44 |
+
dfs.append(df_part)
|
| 45 |
+
except Exception as config_error:
|
| 46 |
+
print(f"Failed to load {config}: {config_error}")
|
| 47 |
+
continue
|
| 48 |
+
|
| 49 |
+
if dfs:
|
| 50 |
+
# Combine all successfully loaded dataframes
|
| 51 |
+
self.df = pd.concat(dfs, ignore_index=True, sort=False)
|
| 52 |
+
print(f"Dataset loaded from {len(dfs)} configurations with {len(self.df)} total records")
|
| 53 |
+
else:
|
| 54 |
+
raise Exception("No configurations loaded successfully")
|
| 55 |
+
|
| 56 |
except Exception as e:
|
| 57 |
print(f"Error loading dataset: {e}")
|
| 58 |
+
print("Using sample dataset for testing")
|
| 59 |
+
# Create a more comprehensive sample dataset for testing
|
| 60 |
self.df = pd.DataFrame({
|
| 61 |
+
'category': ['Air Pollution', 'Water Consumption', 'Waste Generation', 'Land Use', 'Water Pollution'] * 20,
|
| 62 |
+
'impact': ['CO2 Emissions', 'Water Usage', 'Solid Waste', 'Land Conversion', 'Water Contamination'] * 20,
|
| 63 |
+
'value_factor': [185.50, 125.75, 95.25, 205.30, 167.80] * 20,
|
| 64 |
+
'country': ['USA', 'Germany', 'Japan', 'Brazil', 'India'] * 20,
|
| 65 |
+
'units': ['USD per ton CO2', 'USD per m3', 'USD per ton', 'USD per hectare', 'USD per m3'] * 20,
|
| 66 |
+
'region': ['North America', 'Europe', 'Asia', 'South America', 'Asia'] * 20
|
| 67 |
})
|
| 68 |
+
print(f"Sample dataset created with {len(self.df)} records")
|
| 69 |
|
| 70 |
def search_value_factors(self, query: str, category: str = "all") -> List[Dict]:
|
| 71 |
"""Search for value factors based on query and category"""
|
|
|
|
| 216 |
title="🌍 GVFD Explorer",
|
| 217 |
description="AI-powered exploration of the Global Value Factor Database. Search for environmental impact value factors, perform calculations, and get intelligent guidance.",
|
| 218 |
examples=[
|
| 219 |
+
["Value factor for CO2 emissions in Germany", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95],
|
| 220 |
+
["Find air pollution factors for USA", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95],
|
| 221 |
+
["Calculate impact for 100 tons with factor 185.50", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95],
|
| 222 |
+
["What's the water consumption factor in Japan?", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95]
|
| 223 |
],
|
| 224 |
additional_inputs=[
|
| 225 |
gr.Textbox(
|