Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from datasets import load_dataset | |
| import json | |
| from typing import Dict, List, Any, Optional | |
| import re | |
| from transformers import pipeline | |
| import torch | |
| class GVFDChatbot: | |
| def __init__(self): | |
| self.dataset = None | |
| self.df = None | |
| self.llm = None | |
| self.load_data() | |
| self.load_llm() | |
| def load_data(self): | |
| """Load the Global Value Factor Database from HuggingFace""" | |
| try: | |
| print("Loading GVFD dataset...") | |
| self.dataset = load_dataset( | |
| "danielrosehill/Global-Value-Factor-Database-Refactor-V2", | |
| split='train' # Try train split instead | |
| ) | |
| self.df = pd.DataFrame(self.dataset) | |
| print(f"Dataset loaded successfully with {len(self.df)} records") | |
| print(f"Columns: {list(self.df.columns)}") | |
| except Exception as e: | |
| print(f"Error loading dataset: {e}") | |
| print("Using sample dataset for testing") | |
| # Create a comprehensive sample dataset | |
| self.df = pd.DataFrame({ | |
| 'category': ['Air Pollution', 'Water Consumption', 'Waste Generation', 'Land Use', 'Water Pollution'] * 20, | |
| 'impact': ['CO2 Emissions', 'Water Usage', 'Solid Waste', 'Land Conversion', 'Water Contamination'] * 20, | |
| 'value_factor': [185.50, 125.75, 95.25, 205.30, 167.80] * 20, | |
| 'country': ['USA', 'Germany', 'Japan', 'Brazil', 'India'] * 20, | |
| 'units': ['USD per ton CO2', 'USD per m3', 'USD per ton', 'USD per hectare', 'USD per m3'] * 20, | |
| 'region': ['North America', 'Europe', 'Asia', 'South America', 'Asia'] * 20 | |
| }) | |
| print(f"Sample dataset created with {len(self.df)} records") | |
| def load_llm(self): | |
| """Load local LLM for enhanced responses""" | |
| try: | |
| print("๐ค Loading local LLM for enhanced responses...") | |
| # Use a small, efficient conversational model | |
| self.llm = pipeline( | |
| "text-generation", | |
| model="microsoft/DialoGPT-small", | |
| tokenizer="microsoft/DialoGPT-small", | |
| device_map="auto" if torch.cuda.is_available() else "cpu", | |
| pad_token_id=50256 # Set pad token | |
| ) | |
| print("โ LLM loaded successfully - completely FREE!") | |
| except Exception as e: | |
| print(f"โ ๏ธ LLM loading failed: {e}") | |
| print("Falling back to rule-based responses") | |
| self.llm = None | |
| def search_value_factors(self, query: str, category: str = "all") -> List[Dict]: | |
| """Search for value factors based on query and category""" | |
| if self.df is None or self.df.empty: | |
| return [] | |
| results = [] | |
| query_lower = query.lower() | |
| df_filtered = self.df | |
| if category != "all" and 'category' in self.df.columns: | |
| df_filtered = self.df[self.df['category'].str.lower().str.contains(category.lower(), na=False)] | |
| text_columns = [col for col in df_filtered.columns if df_filtered[col].dtype == 'object'] | |
| for _, row in df_filtered.iterrows(): | |
| match_score = 0 | |
| for col in text_columns: | |
| if pd.notna(row[col]) and query_lower in str(row[col]).lower(): | |
| match_score += 1 | |
| if match_score > 0: | |
| result = row.to_dict() | |
| result['match_score'] = match_score | |
| results.append(result) | |
| results.sort(key=lambda x: x['match_score'], reverse=True) | |
| return results[:10] | |
| def calculate_impact_value(self, impact_quantity: float, value_factor: float, country: str = "") -> Dict: | |
| """Calculate monetary impact value""" | |
| if pd.isna(impact_quantity) or pd.isna(value_factor): | |
| return {"error": "Invalid input values"} | |
| monetary_impact = impact_quantity * value_factor | |
| return { | |
| "impact_quantity": impact_quantity, | |
| "value_factor": value_factor, | |
| "monetary_impact_usd": round(monetary_impact, 2), | |
| "country": country, | |
| "calculation": f"{impact_quantity} ร {value_factor} = ${monetary_impact:,.2f}" | |
| } | |
| def get_country_factors(self, country: str) -> List[Dict]: | |
| """Get all value factors for a specific country""" | |
| if self.df is None or self.df.empty: | |
| return [] | |
| country_data = [] | |
| country_columns = [col for col in self.df.columns if 'country' in col.lower()] | |
| for _, row in self.df.iterrows(): | |
| for col in country_columns: | |
| if pd.notna(row[col]) and country.lower() in str(row[col]).lower(): | |
| country_data.append(row.to_dict()) | |
| break | |
| return country_data | |
| def generate_llm_response(self, message: str, data_context: str = "") -> str: | |
| """Generate LLM response with GVFD context""" | |
| if not self.llm: | |
| return None | |
| try: | |
| # Create a context-aware prompt | |
| system_prompt = """You are an expert assistant for the Global Value Factor Database (GVFD). | |
| You help users find environmental impact value factors that convert impacts to USD values. | |
| Key guidance: | |
| - When users ask for "value factor for X in Y country", provide what was found | |
| - If no exact match, suggest similar factors, related categories, or nearby countries | |
| - Explain what value factors are and why they vary by location | |
| - Guide users to alternatives when specific data isn't available | |
| - Be helpful and educational about environmental impact monetization""" | |
| # Build the prompt with data context | |
| if data_context: | |
| prompt = f"{system_prompt}\n\nSearch results: {data_context}\n\nUser: {message}\nAssistant:" | |
| else: | |
| prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:" | |
| # Generate response | |
| response = self.llm( | |
| prompt, | |
| max_length=len(prompt) + 150, | |
| temperature=0.7, | |
| do_sample=True, | |
| pad_token_id=50256 | |
| ) | |
| # Extract the assistant's response | |
| full_text = response[0]['generated_text'] | |
| assistant_response = full_text.split("Assistant:")[-1].strip() | |
| # Clean up | |
| assistant_response = assistant_response.replace("User:", "").strip() | |
| return f"๐ค **AI Assistant**: {assistant_response}" | |
| except Exception as e: | |
| print(f"LLM generation error: {e}") | |
| return None | |
| def process_chat_message(self, message: str, history: List[List[str]]) -> str: | |
| """Process chat message with LLM-enhanced responses""" | |
| message_lower = message.lower() | |
| data_context = "" | |
| # Handle calculations | |
| if "calculate" in message_lower: | |
| numbers = re.findall(r'\d+(?:\.\d+)?', message) | |
| if len(numbers) >= 2: | |
| try: | |
| quantity = float(numbers[0]) | |
| factor = float(numbers[1]) | |
| result = self.calculate_impact_value(quantity, factor) | |
| if "error" not in result: | |
| data_context = f"Calculation: {result['calculation']} = ${result['monetary_impact_usd']:,}" | |
| # Try LLM response first | |
| llm_response = self.generate_llm_response(message, data_context) | |
| if llm_response: | |
| return llm_response + f"\n\n๐ **Quick Reference:** {result['calculation']} = ${result['monetary_impact_usd']:,}" | |
| # Fallback | |
| return f"๐ฐ **Impact Calculation**\n\n{result['calculation']}\n\n**Monetary Impact:** ${result['monetary_impact_usd']:,}" | |
| except: | |
| pass | |
| # Handle searches and "value factor for X in Y" queries | |
| elif any(keyword in message_lower for keyword in ["search", "find", "factor", "value factor for", " in ", "retrieve"]): | |
| search_terms = message_lower | |
| for word in ["search", "find", "factor", "value factor for", "retrieve", "show me", "get"]: | |
| search_terms = search_terms.replace(word, "") | |
| search_terms = search_terms.strip() | |
| results = self.search_value_factors(search_terms) | |
| if results: | |
| # Build data context for LLM | |
| data_context = f"Found {len(results)} matches for '{search_terms}': " | |
| for i, result in enumerate(results[:3]): | |
| data_context += f"Match {i+1}: {result}; " | |
| # Try LLM response first | |
| llm_response = self.generate_llm_response(message, data_context) | |
| if llm_response: | |
| # Add structured data after LLM response | |
| structured_data = f"\n\n๐ **Quick Reference:**\n" | |
| for i, result in enumerate(results[:3], 1): | |
| key_fields = ['category', 'impact', 'value_factor', 'country', 'units'] | |
| shown = [] | |
| for field in key_fields: | |
| if field in result and pd.notna(result[field]): | |
| shown.append(f"{result[field]}") | |
| structured_data += f"**{i}.** " + " | ".join(shown[:4]) + "\n" | |
| return llm_response + structured_data | |
| # Fallback to structured response | |
| response = f"๐ **Found {len(results)} value factors:**\n\n" | |
| for i, result in enumerate(results[:5], 1): | |
| response += f"**{i}.** " | |
| key_fields = ['category', 'impact', 'value_factor', 'country', 'units'] | |
| shown_fields = [] | |
| for field in key_fields: | |
| if field in result and pd.notna(result[field]): | |
| shown_fields.append(f"{result[field]}") | |
| response += " | ".join(shown_fields[:4]) + "\n\n" | |
| return response | |
| else: | |
| # No results - let LLM provide guidance | |
| data_context = f"No matches found for '{search_terms}'. Need to suggest alternatives." | |
| llm_response = self.generate_llm_response(message, data_context) | |
| if llm_response: | |
| return llm_response | |
| # Fallback | |
| return f"โ **No matches for '{search_terms}'**\n\n๐ Try: 'air pollution USA', 'water Germany', 'CO2 Japan'" | |
| # All other queries - let LLM handle | |
| llm_response = self.generate_llm_response(message) | |
| if llm_response: | |
| return llm_response | |
| # Final fallback | |
| return """๐ **Welcome to the GVFD Explorer!** | |
| ๐ค **AI-Enhanced responses** - Ask me about value factors! | |
| ๐ **Try**: "Value factor for CO2 emissions in Germany" | |
| ๐งฎ **Calculate**: "Calculate impact for 100 tons with factor 185.50" | |
| ๐ **Explore**: "What are value factors?" | |
| **Dataset**: 229 countries | **Source**: IFVI | **Status**: โ AI-powered""" | |
| # Initialize the chatbot | |
| chatbot = GVFDChatbot() | |
| def chat_interface(message, history): | |
| return chatbot.process_chat_message(message, history) | |
| # Create simple Gradio interface | |
| with gr.Blocks(title="GVFD Explorer", theme=gr.themes.Soft()) as app: | |
| gr.Markdown(""" | |
| # ๐ Global Value Factor Database Explorer | |
| **๐ค AI-powered assistant for exploring environmental impact value factors** | |
| **Dataset**: [Global Value Factor Database Refactor V2](https://huggingface.co/datasets/danielrosehill/Global-Value-Factor-Database-Refactor-V2) | |
| **Source**: International Foundation for Valuing Impacts (IFVI) | |
| **Coverage**: 229 countries, environmental impact monetization | |
| **AI**: Local LLM with contextual responses โจ **Completely FREE** | |
| """) | |
| chatbot_interface = gr.ChatInterface( | |
| chat_interface, | |
| title="๐ค AI-Enhanced GVFD Assistant", | |
| description="Ask questions about value factors and get intelligent, contextual responses with alternatives and guidance.", | |
| examples=[ | |
| "Value factor for CO2 emissions in Germany", | |
| "Retrieve one value factor in the US for demo", | |
| "Calculate impact for 100 tons with factor 185.50", | |
| "What are value factors and why do they vary?" | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| app.launch() |