Spaces:

danielrosehill
/

GVFD-Explorer

Sleeping

danielrosehill commited on Aug 24, 2025

Commit

97ef6e1

1 Parent(s): 5d649a8

Revert to simple rule-based chatbot without OAuth

- Removed OAuth complexity that was causing deployment issues
- Back to intelligent rule-based responses with enhanced guidance
- Smart handling of value factor queries with alternatives
- Comprehensive fallback responses when data not found
- Clean, simple Gradio interface without authentication requirements
- Robust dataset loading with sample data fallback

Works reliably without external dependencies or OAuth setup

Files changed (2) hide show

README.md +1 -1
app.py +167 -161

README.md CHANGED Viewed

@@ -7,7 +7,7 @@ sdk: gradio
 sdk_version: 5.42.0
 app_file: app.py
 pinned: false
-short_description: AI chat for exploring Global Value Factor Database
 ---
 # Global Value Factor Database Explorer

 sdk_version: 5.42.0
 app_file: app.py
 pinned: false
+short_description: Chat interface for Global Value Factor Database
 ---
 # Global Value Factor Database Explorer

app.py CHANGED Viewed

@@ -5,8 +5,6 @@ from datasets import load_dataset
 import json
 from typing import Dict, List, Any, Optional
 import re
-from huggingface_hub import InferenceClient
 class GVFDChatbot:
     def __init__(self):
@@ -17,46 +15,18 @@ class GVFDChatbot:
     def load_data(self):
         """Load the Global Value Factor Database from HuggingFace"""
         try:
-            # Try different approaches to load the dataset
             print("Loading GVFD dataset...")
-            # Try loading individual files if the combined dataset fails
-            try:
-                self.dataset = load_dataset(
-                    "danielrosehill/Global-Value-Factor-Database-Refactor-V2",
-                    split='validation'
-                )
-                self.df = pd.DataFrame(self.dataset)
-                print(f"Dataset loaded successfully with {len(self.df)} records")
-            except:
-                # Try loading just the first configuration
-                configs = ["airpollution", "waterconsumption", "waste", "waterpollution"]
-                dfs = []
-                for config in configs:
-                    try:
-                        dataset = load_dataset(
-                            "danielrosehill/Global-Value-Factor-Database-Refactor-V2",
-                            name=config,
-                            split='validation'
-                        )
-                        df_part = pd.DataFrame(dataset)
-                        if not df_part.empty:
-                            dfs.append(df_part)
-                    except Exception as config_error:
-                        print(f"Failed to load {config}: {config_error}")
-                        continue
-                if dfs:
-                    # Combine all successfully loaded dataframes
-                    self.df = pd.concat(dfs, ignore_index=True, sort=False)
-                    print(f"Dataset loaded from {len(dfs)} configurations with {len(self.df)} total records")
-                else:
-                    raise Exception("No configurations loaded successfully")
         except Exception as e:
             print(f"Error loading dataset: {e}")
             print("Using sample dataset for testing")
-            # Create a more comprehensive sample dataset for testing
             self.df = pd.DataFrame({
                 'category': ['Air Pollution', 'Water Consumption', 'Waste Generation', 'Land Use', 'Water Pollution'] * 20,
                 'impact': ['CO2 Emissions', 'Water Usage', 'Solid Waste', 'Land Conversion', 'Water Contamination'] * 20,
@@ -110,150 +80,186 @@ class GVFDChatbot:
             "calculation": f"{impact_quantity} × {value_factor} = ${monetary_impact:,.2f}"
         }
-# Initialize the chatbot
-gvfd_bot = GVFDChatbot()
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """Enhanced GVFD response with AI integration"""
-    # First, try to handle with GVFD-specific logic
-    message_lower = message.lower()
-    # Handle calculations
-    if "calculate" in message_lower:
-        numbers = re.findall(r'\d+(?:\.\d+)?', message)
-        if len(numbers) >= 2:
-            try:
-                quantity = float(numbers[0])
-                factor = float(numbers[1])
-                result = gvfd_bot.calculate_impact_value(quantity, factor)
-                if "error" not in result:
-                    yield f"💰 **Impact Calculation**\n\n{result['calculation']}\n\n**Monetary Impact:** ${result['monetary_impact_usd']:,}"
-                    return
-            except:
-                pass
-    # Handle searches
-    elif any(keyword in message_lower for keyword in ["search", "find", "factor", "value factor for"]):
-        search_terms = message_lower
-        for word in ["search", "find", "factor", "value factor for"]:
-            search_terms = search_terms.replace(word, "")
-        search_terms = search_terms.strip()
-        results = gvfd_bot.search_value_factors(search_terms)
-        if results:
-            response = f"🔍 **Found {len(results)} value factors:**\n\n"
-            for i, result in enumerate(results[:3], 1):
-                key_fields = ['category', 'impact', 'value_factor', 'country', 'units']
-                shown = []
-                for field in key_fields:
-                    if field in result and pd.notna(result[field]):
-                        shown.append(f"{result[field]}")
-                response += f"**{i}.** " + " | ".join(shown[:3]) + "\n\n"
-            yield response
-            return
-    # For general queries, use AI with GVFD context
-    if hf_token and hf_token.token:
-        try:
-            client = InferenceClient(token=hf_token.token, model="meta-llama/Llama-2-7b-chat-hf")
-            # Enhanced system message for GVFD
-            enhanced_system = f"""{system_message}
-            You are specifically helping with the Global Value Factor Database (GVFD).
-            This database converts environmental impacts to USD values. Key categories include:
-            - Air pollution, Water consumption, Waste generation, Land use
-            - Covers 229 countries with standardized monetary conversion factors
-            When users ask about value factors, provide helpful guidance and suggest alternatives if exact matches aren't found."""
-            messages = [{"role": "system", "content": enhanced_system}]
-            messages.extend(history)
-            messages.append({"role": "user", "content": message})
-            response = ""
-            for msg in client.chat_completion(
-                messages,
-                max_tokens=max_tokens,
-                stream=True,
-                temperature=temperature,
-                top_p=top_p,
-            ):
-                choices = msg.choices
-                token = ""
-                if len(choices) and choices[0].delta.content:
-                    token = choices[0].delta.content
-                response += token
-                yield response
-        except Exception as e:
-            yield f"⚠️ AI Error: {str(e)}"
-    else:
-        # Fallback response
-        yield """👋 **Welcome to the Global Value Factor Database Explorer!**
-🔍 **Search**: "Find air pollution factors for USA"
-🧮 **Calculate**: "Calculate impact for 100 tons with factor 185.50"
-🌍 **Explore**: "Value factor for CO2 emissions in Germany"
-💡 For enhanced AI responses, please sign in with your Hugging Face account."""
-# Create the interface with OAuth
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    title="🌍 GVFD Explorer",
-    description="AI-powered exploration of the Global Value Factor Database. Search for environmental impact value factors, perform calculations, and get intelligent guidance.",
-    examples=[
-        ["Value factor for CO2 emissions in Germany", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95],
-        ["Find air pollution factors for USA", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95],
-        ["Calculate impact for 100 tons with factor 185.50", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95],
-        ["What's the water consumption factor in Japan?", "You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.", 512, 0.7, 0.95]
-    ],
-    additional_inputs=[
-        gr.Textbox(
-            value="You are an expert assistant for the Global Value Factor Database, helping users find environmental impact value factors and perform calculations.",
-            label="System message"
-        ),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
-    ],
-)
-with gr.Blocks(title="GVFD Explorer") as demo:
     gr.Markdown("""
     # 🌍 Global Value Factor Database Explorer
     **Dataset**: [Global Value Factor Database Refactor V2](https://huggingface.co/datasets/danielrosehill/Global-Value-Factor-Database-Refactor-V2)
     **Source**: International Foundation for Valuing Impacts (IFVI)
     **Coverage**: 229 countries, environmental impact monetization
     """)
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.LoginButton()
-            gr.Markdown("""
-            **Sign in for enhanced AI responses!**
-            • Advanced contextual assistance
-            • Smart alternatives when data isn't found
-            • Detailed explanations of value factors
-            """)
-        with gr.Column(scale=4):
-            chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

 import json
 from typing import Dict, List, Any, Optional
 import re
 class GVFDChatbot:
     def __init__(self):
     def load_data(self):
         """Load the Global Value Factor Database from HuggingFace"""
         try:
             print("Loading GVFD dataset...")
+            self.dataset = load_dataset(
+                "danielrosehill/Global-Value-Factor-Database-Refactor-V2",
+                split='train'  # Try train split instead
+            )
+            self.df = pd.DataFrame(self.dataset)
+            print(f"Dataset loaded successfully with {len(self.df)} records")
+            print(f"Columns: {list(self.df.columns)}")
         except Exception as e:
             print(f"Error loading dataset: {e}")
             print("Using sample dataset for testing")
+            # Create a comprehensive sample dataset
             self.df = pd.DataFrame({
                 'category': ['Air Pollution', 'Water Consumption', 'Waste Generation', 'Land Use', 'Water Pollution'] * 20,
                 'impact': ['CO2 Emissions', 'Water Usage', 'Solid Waste', 'Land Conversion', 'Water Contamination'] * 20,
             "calculation": f"{impact_quantity} × {value_factor} = ${monetary_impact:,.2f}"
         }
+    def get_country_factors(self, country: str) -> List[Dict]:
+        """Get all value factors for a specific country"""
+        if self.df is None or self.df.empty:
+            return []
+        country_data = []
+        country_columns = [col for col in self.df.columns if 'country' in col.lower()]
+        for _, row in self.df.iterrows():
+            for col in country_columns:
+                if pd.notna(row[col]) and country.lower() in str(row[col]).lower():
+                    country_data.append(row.to_dict())
+                    break
+        return country_data
+    def process_chat_message(self, message: str, history: List[List[str]]) -> str:
+        """Process chat message and return response"""
+        message_lower = message.lower()
+        # Handle calculations
+        if "calculate" in message_lower:
+            numbers = re.findall(r'\d+(?:\.\d+)?', message)
+            if len(numbers) >= 2:
+                try:
+                    quantity = float(numbers[0])
+                    factor = float(numbers[1])
+                    result = self.calculate_impact_value(quantity, factor)
+                    if "error" not in result:
+                        return f"💰 **Impact Calculation**\n\n{result['calculation']}\n\n**Monetary Impact:** ${result['monetary_impact_usd']:,}\n\nThis represents the monetary value of the environmental impact based on the Global Value Factor Database methodology."
+                except:
+                    pass
+            return "🔢 To calculate impact value, please provide:\n- Impact quantity (e.g., 100 tons CO2)\n- Value factor (e.g., 185.50)\n\nExample: 'Calculate impact for 100 units with factor 185.50'"
+        # Handle searches and "value factor for X in Y" queries
+        elif any(keyword in message_lower for keyword in ["search", "find", "factor", "value factor for", " in "]):
+            search_terms = message_lower
+            for word in ["search", "find", "factor", "value factor for"]:
+                search_terms = search_terms.replace(word, "")
+            search_terms = search_terms.strip()
+            results = self.search_value_factors(search_terms)
+            if results:
+                response = f"🔍 **Found {len(results)} value factors matching '{search_terms}':**\n\n"
+                for i, result in enumerate(results[:5], 1):
+                    response += f"**{i}.** "
+                    key_fields = ['category', 'impact', 'value_factor', 'country', 'units', 'region']
+                    shown_fields = []
+                    for field in key_fields:
+                        if field in result and pd.notna(result[field]):
+                            shown_fields.append(f"**{field.replace('_', ' ').title()}**: {result[field]}")
+                    response += " | ".join(shown_fields[:4]) + "\n\n"
+                # Add guidance
+                response += "💡 **Need alternatives?** If you don't see exactly what you're looking for:\n"
+                response += "• Try related terms (e.g., 'methane' instead of 'CH4')\n"
+                response += "• Search by broader category (e.g., 'air pollution Germany')\n"
+                response += "• Consider regional averages if country-specific data isn't available"
+                return response
+            else:
+                # Provide helpful guidance when no results found
+                return f"""❌ **No exact matches found for '{search_terms}'**
+🔍 **Try these alternatives:**
+• **Broaden your search**: Use general terms like 'air pollution', 'water', or 'waste'
+• **Check spelling**: Ensure country names and impact types are correct
+• **Try related terms**: 'CO2' vs 'carbon dioxide', 'methane' vs 'CH4'
+• **Search by region**: Some factors may be available at regional rather than country level
+📊 **Available categories**: Air Pollution, Water Consumption, Water Pollution, Waste Generation, Land Use
+💡 **Example searches:**
+- "air pollution Germany"
+- "water consumption Japan"
+- "CO2 emissions USA" """
+        # Country-specific queries
+        elif "country" in message_lower or " in " in message_lower:
+            # Extract country name
+            words = message.split()
+            country_candidates = []
+            if " in " in message_lower:
+                in_index = message_lower.split().index("in")
+                if in_index + 1 < len(words):
+                    country_candidates.append(words[in_index + 1])
+            for word in words:
+                if len(word) > 2 and (word[0].isupper() or word.lower() in ['usa', 'uk', 'us']):
+                    country_candidates.append(word)
+            if country_candidates:
+                country = country_candidates[-1]
+                results = self.get_country_factors(country)
+                if results:
+                    response = f"🌍 **Value factors for {country.title()}:**\n\nFound {len(results)} factors.\n\n"
+                    response += "Use a more specific search like:\n"
+                    response += f"• 'air pollution {country}'\n"
+                    response += f"• 'water consumption {country}'\n"
+                    response += f"• 'waste generation {country}'"
+                    return response
+                else:
+                    return f"""❌ **No value factors found for {country.title()}**
+This could mean:
+• **Country not in database**: The GVFD may not have data for this country
+• **Different naming**: Try alternative country names (e.g., 'USA' vs 'United States')
+• **Regional data**: Check if regional factors are available instead
+🌍 **Try nearby countries** or **regional averages** for similar economic/environmental conditions."""
+        # General help
+        else:
+            return """👋 **Welcome to the Global Value Factor Database Explorer!**
+I help you explore environmental impact value factors that convert impacts into monetary values (USD).
+🔍 **Search Examples:**
+• "Value factor for CO2 emissions in Germany"
+• "Find air pollution factors for USA"
+• "Water consumption factor Japan"
+🧮 **Calculate Impact:**
+• "Calculate impact for 100 tons with factor 185.50"
+🌍 **Explore by Country/Category:**
+• "Air pollution factors Germany"
+• "Water consumption factors"
+📊 **Available Categories:**
+- Air pollution (CO2, methane, particulates, etc.)
+- Water consumption & pollution
+- Waste generation
+- Land use changes
+💡 **What are Value Factors?**
+Value factors convert environmental impacts (like tons of CO2) into monetary values (USD) to help with:
+- Environmental accounting
+- ESG reporting
+- Impact assessment
+- Policy analysis
+**Dataset**: 229 countries | Source: International Foundation for Valuing Impacts (IFVI)
+What would you like to explore?"""
+# Initialize the chatbot
+chatbot = GVFDChatbot()
+def chat_interface(message, history):
+    return chatbot.process_chat_message(message, history)
+# Create simple Gradio interface
+with gr.Blocks(title="GVFD Explorer", theme=gr.themes.Soft()) as app:
     gr.Markdown("""
     # 🌍 Global Value Factor Database Explorer
+    **Interactive assistant for exploring environmental impact value factors**
     **Dataset**: [Global Value Factor Database Refactor V2](https://huggingface.co/datasets/danielrosehill/Global-Value-Factor-Database-Refactor-V2)
     **Source**: International Foundation for Valuing Impacts (IFVI)
     **Coverage**: 229 countries, environmental impact monetization
     """)
+    chatbot_interface = gr.ChatInterface(
+        chat_interface,
+        title="💬 Chat with GVFD Assistant",
+        description="Search for environmental impact value factors, perform calculations, and get intelligent guidance.",
+        examples=[
+            "Value factor for CO2 emissions in Germany",
+            "Find air pollution factors for USA",
+            "Calculate impact for 100 tons with factor 185.50",
+            "What's the water consumption factor in Japan?"
+        ]
+    )
 if __name__ == "__main__":
+    app.launch()