Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import re | |
| import numpy as np | |
| from typing import List, Dict, Any | |
| # Load and clean the dataset | |
| df = pd.read_csv("indian_car_info.csv") | |
| # Clean brand and model columns | |
| df["brand"] = df["brand"].str.strip().str.lower() | |
| df["model"] = df["model"].str.strip() | |
| df["features"] = df["features"].astype(str).str.lower() | |
| # Control long responses | |
| MAX_TOTAL_CHARACTERS = 3000 | |
| def extract_numbers(text: str) -> List[float]: | |
| """Extract all numbers from text""" | |
| return [float(x) for x in re.findall(r'\d+\.?\d*', text)] | |
| def find_brand_mentions(query: str) -> List[str]: | |
| """Find all brand mentions in query""" | |
| unique_brands = df["brand"].unique() | |
| return [brand for brand in unique_brands if brand in query.lower()] | |
| def find_model_mentions(query: str) -> List[str]: | |
| """Find all model mentions in query""" | |
| unique_models = df["model"].str.lower().unique() | |
| return [model for model in unique_models if model.lower() in query.lower()] | |
| def extract_price_range(query: str) -> tuple: | |
| """Extract price range from query""" | |
| min_price, max_price = None, None | |
| # Pattern for "under X", "below X", "less than X" | |
| under_match = re.search(r'(?:under|below|less than|up to)\s*βΉ?(\d+)', query.lower()) | |
| if under_match: | |
| max_price = float(under_match.group(1)) | |
| # Pattern for "above X", "more than X", "at least X" | |
| above_match = re.search(r'(?:above|more than|at least|over)\s*βΉ?(\d+)', query.lower()) | |
| if above_match: | |
| min_price = float(above_match.group(1)) | |
| # Pattern for "between X and Y" | |
| between_match = re.search(r'between\s*βΉ?(\d+)\s*(?:and|to)\s*βΉ?(\d+)', query.lower()) | |
| if between_match: | |
| min_price = float(between_match.group(1)) | |
| max_price = float(between_match.group(2)) | |
| # Pattern for "around X", "approximately X" | |
| around_match = re.search(r'(?:around|approximately|about)\s*βΉ?(\d+)', query.lower()) | |
| if around_match: | |
| target = float(around_match.group(1)) | |
| min_price = target - 2 | |
| max_price = target + 2 | |
| return min_price, max_price | |
| def extract_mileage_range(query: str) -> tuple: | |
| """Extract mileage requirements from query""" | |
| min_mileage, max_mileage = None, None | |
| # Look for mileage-related keywords | |
| mileage_keywords = ['mileage', 'fuel efficiency', 'kmpl', 'fuel economy'] | |
| has_mileage_context = any(keyword in query.lower() for keyword in mileage_keywords) | |
| if has_mileage_context: | |
| # Pattern for "above X kmpl", "more than X kmpl" | |
| above_match = re.search(r'(?:above|more than|at least|over)\s*(\d+)', query.lower()) | |
| if above_match: | |
| min_mileage = float(above_match.group(1)) | |
| # Pattern for "below X kmpl", "under X kmpl" | |
| below_match = re.search(r'(?:below|under|less than)\s*(\d+)', query.lower()) | |
| if below_match: | |
| max_mileage = float(below_match.group(1)) | |
| return min_mileage, max_mileage | |
| def extract_features(query: str) -> List[str]: | |
| """Extract feature requirements from query""" | |
| feature_keywords = [ | |
| "sunroof", "automatic", "manual", "cruise control", "abs", "airbags", | |
| "android auto", "touchscreen", "rear camera", "parking sensor", | |
| "bluetooth", "usb", "keyless", "push button", "climate control", | |
| "leather seats", "alloy wheels", "fog lights", "power steering", | |
| "power windows", "central locking", "music system", "navigation" | |
| ] | |
| return [feat for feat in feature_keywords if feat in query.lower()] | |
| def get_comparison_cars(query: str) -> List[Dict]: | |
| """Handle comparison queries""" | |
| # Look for comparison keywords | |
| comparison_words = ['vs', 'versus', 'compare', 'comparison', 'better', 'best'] | |
| if not any(word in query.lower() for word in comparison_words): | |
| return [] | |
| brands = find_brand_mentions(query) | |
| models = find_model_mentions(query) | |
| if len(brands) >= 2 or len(models) >= 2: | |
| # Return cars for comparison | |
| if models: | |
| return df[df["model"].str.lower().isin(models)].to_dict('records') | |
| else: | |
| return df[df["brand"].isin(brands)].to_dict('records') | |
| return [] | |
| def handle_specific_questions(query: str) -> str: | |
| """Handle specific question types""" | |
| query_lower = query.lower() | |
| # Price questions | |
| if any(word in query_lower for word in ['cheapest', 'lowest price', 'most affordable']): | |
| cheapest = df.loc[df['price_lakh'].idxmin()] | |
| return f"π° Cheapest car: {cheapest['brand'].title()} {cheapest['model']} at βΉ{cheapest['price_lakh']} Lakh" | |
| if any(word in query_lower for word in ['most expensive', 'highest price', 'premium']): | |
| expensive = df.loc[df['price_lakh'].idxmax()] | |
| return f"π Most expensive car: {expensive['brand'].title()} {expensive['model']} at βΉ{expensive['price_lakh']} Lakh" | |
| # Mileage questions | |
| if any(word in query_lower for word in ['best mileage', 'highest mileage', 'most fuel efficient']): | |
| best_mileage = df.loc[df['mileage_kmpl'].idxmax()] | |
| return f"β½ Best mileage car: {best_mileage['brand'].title()} {best_mileage['model']} with {best_mileage['mileage_kmpl']} kmpl" | |
| if any(word in query_lower for word in ['worst mileage', 'lowest mileage', 'least fuel efficient']): | |
| worst_mileage = df.loc[df['mileage_kmpl'].idxmin()] | |
| return f"β½ Lowest mileage car: {worst_mileage['brand'].title()} {worst_mileage['model']} with {worst_mileage['mileage_kmpl']} kmpl" | |
| # Count questions | |
| if any(word in query_lower for word in ['how many', 'count', 'number of']): | |
| if any(brand in query_lower for brand in df['brand'].unique()): | |
| brand = next(brand for brand in df['brand'].unique() if brand in query_lower) | |
| count = len(df[df['brand'] == brand]) | |
| return f"π {brand.title()} has {count} cars in our database" | |
| else: | |
| return f"π Total cars in database: {len(df)}" | |
| # Average questions | |
| if 'average' in query_lower: | |
| if 'price' in query_lower: | |
| avg_price = df['price_lakh'].mean() | |
| return f"π Average car price: βΉ{avg_price:.2f} Lakh" | |
| elif 'mileage' in query_lower: | |
| avg_mileage = df['mileage_kmpl'].mean() | |
| return f"π Average mileage: {avg_mileage:.2f} kmpl" | |
| # Brand-specific questions | |
| brands = find_brand_mentions(query) | |
| if brands and any(word in query_lower for word in ['models', 'variants', 'options']): | |
| brand = brands[0] | |
| brand_cars = df[df['brand'] == brand] | |
| models = brand_cars['model'].unique() | |
| return f"π {brand.title()} models: {', '.join(models)}" | |
| return "" | |
| def format_car_details(car: Dict, show_features: bool = True) -> str: | |
| """Format car details for display""" | |
| features_text = "" | |
| if show_features and 'features' in car: | |
| features = car['features'][:200] + "..." if len(car['features']) > 200 else car['features'] | |
| features_text = f"- Features: {features.title()}\n" | |
| return f"""π {car['brand'].title()} {car['model']} | |
| - Engine: {car['engine']} | |
| - Mileage: {car['mileage_kmpl']} kmpl | |
| - Price: βΉ{car['price_lakh']} Lakh | |
| {features_text}""" | |
| def answer_question(query: str) -> str: | |
| if not query.strip(): | |
| return "β Please ask me something about Indian cars!" | |
| query = query.strip() | |
| # Handle specific questions first | |
| specific_answer = handle_specific_questions(query) | |
| if specific_answer: | |
| return specific_answer | |
| # Handle comparisons | |
| comparison_cars = get_comparison_cars(query) | |
| if comparison_cars: | |
| response = "π Car Comparison:\n\n" | |
| for car in comparison_cars[:3]: # Limit to 3 cars | |
| response += format_car_details(car, show_features=False) + "\n" | |
| return response.strip() | |
| # Check for specific car mention (brand + model) | |
| for _, row in df.iterrows(): | |
| car_name = f"{row['brand']} {row['model']}".lower() | |
| if car_name in query.lower(): | |
| return f"π {row['brand'].title()} {row['model']} Details:\n" + format_car_details(row.to_dict()) | |
| # Start filtering | |
| filtered_df = df.copy() | |
| # Filter by brand | |
| brands = find_brand_mentions(query) | |
| if brands: | |
| filtered_df = filtered_df[filtered_df["brand"].isin(brands)] | |
| # Filter by model | |
| models = find_model_mentions(query) | |
| if models: | |
| filtered_df = filtered_df[filtered_df["model"].str.lower().isin(models)] | |
| # Filter by price | |
| min_price, max_price = extract_price_range(query) | |
| if min_price is not None: | |
| filtered_df = filtered_df[filtered_df["price_lakh"] >= min_price] | |
| if max_price is not None: | |
| filtered_df = filtered_df[filtered_df["price_lakh"] <= max_price] | |
| # Filter by mileage | |
| min_mileage, max_mileage = extract_mileage_range(query) | |
| if min_mileage is not None: | |
| filtered_df = filtered_df[filtered_df["mileage_kmpl"] >= min_mileage] | |
| if max_mileage is not None: | |
| filtered_df = filtered_df[filtered_df["mileage_kmpl"] <= max_mileage] | |
| # Filter by features | |
| features = extract_features(query) | |
| for feature in features: | |
| filtered_df = filtered_df[filtered_df["features"].str.contains(feature, na=False)] | |
| # Sort results based on query intent | |
| if any(word in query.lower() for word in ['cheap', 'affordable', 'budget']): | |
| filtered_df = filtered_df.sort_values('price_lakh') | |
| elif any(word in query.lower() for word in ['expensive', 'premium', 'luxury']): | |
| filtered_df = filtered_df.sort_values('price_lakh', ascending=False) | |
| elif any(word in query.lower() for word in ['mileage', 'fuel efficient', 'economy']): | |
| filtered_df = filtered_df.sort_values('mileage_kmpl', ascending=False) | |
| # Generate response | |
| if filtered_df.empty: | |
| return "β No matching cars found for your query. Try adjusting your requirements!" | |
| response = "" | |
| if len(filtered_df) > 1: | |
| response += f"Found {len(filtered_df)} matching cars:\n\n" | |
| for _, row in filtered_df.head(5).iterrows(): # Show top 5 results | |
| entry = format_car_details(row.to_dict()) + "\n" | |
| if len(response + entry) > MAX_TOTAL_CHARACTERS: | |
| break | |
| response += entry | |
| if len(filtered_df) > 5: | |
| response += f"\n... and {len(filtered_df) - 5} more cars match your criteria." | |
| return response.strip() | |
| # Enhanced Gradio interface | |
| examples = [ | |
| "Show me Maruti cars", | |
| "What's the mileage of Tata Nexon?", | |
| "Compare Hyundai Creta vs Tata Harrier", | |
| "Best mileage car under 10 lakhs", | |
| "Mahindra cars with price and mileage", | |
| "Cars between 5 and 15 lakhs", | |
| "Which car has the best features?", | |
| "Average price of cars in database" | |
| ] | |
| gr.Interface( | |
| fn=answer_question, | |
| inputs=gr.Textbox( | |
| lines=2, | |
| placeholder="Ask me anything about Indian cars! E.g., 'Best mileage car under 10L', 'Compare Creta vs Harrier'", | |
| label="Your Question" | |
| ), | |
| outputs=gr.Textbox( | |
| lines=15, | |
| label="Car Information" | |
| ), | |
| title="π Enhanced Indian Car AI Assistant", | |
| description="Ask me anything about Indian cars! I can help with comparisons, recommendations, specifications, and more.", | |
| examples=examples, | |
| theme="soft" | |
| ).launch() |