Spaces:

Satwickchikkala1
/

Indian_car_Bot

Sleeping

App Files Files Community

Satwickchikkala1 commited on Jul 13, 2025

Commit

9f566d2

verified ·

1 Parent(s): 920f99e

Update app.py

Browse files

Files changed (1) hide show

app.py +446 -151

app.py CHANGED Viewed

@@ -4,17 +4,151 @@ import re
 import numpy as np
 from typing import List, Dict, Any
 # Load and clean the dataset
-df = pd.read_csv("indian_car_info.csv")
 # Clean brand and model columns
 df["brand"] = df["brand"].str.strip().str.lower()
 df["model"] = df["model"].str.strip()
-df["features"] = df["features"].astype(str).str.lower()
 # Control long responses
 MAX_TOTAL_CHARACTERS = 5000
 def extract_numbers(text: str) -> List[float]:
     """Extract all numbers from text"""
     return [float(x) for x in re.findall(r'\d+\.?\d*', text)]
@@ -22,271 +156,429 @@ def extract_numbers(text: str) -> List[float]:
 def find_brand_mentions(query: str) -> List[str]:
     """Find all brand mentions in query"""
     unique_brands = df["brand"].unique()
-    return [brand for brand in unique_brands if brand in query.lower()]
 def find_model_mentions(query: str) -> List[str]:
     """Find all model mentions in query"""
     unique_models = df["model"].str.lower().unique()
-    return [model for model in unique_models if model.lower() in query.lower()]
 def extract_price_range(query: str) -> tuple:
     """Extract price range from query"""
     min_price, max_price = None, None
-    # Pattern for "under X", "below X", "less than X"
-    under_match = re.search(r'(?:under|below|less than|up to)\s*₹?(\d+)', query.lower())
     if under_match:
         max_price = float(under_match.group(1))
-    # Pattern for "above X", "more than X", "at least X"
-    above_match = re.search(r'(?:above|more than|at least|over)\s*₹?(\d+)', query.lower())
     if above_match:
         min_price = float(above_match.group(1))
     # Pattern for "between X and Y"
-    between_match = re.search(r'between\s*₹?(\d+)\s*(?:and|to)\s*₹?(\d+)', query.lower())
     if between_match:
-        min_price = float(between_match.group(1))
-        max_price = float(between_match.group(2))
     # Pattern for "around X", "approximately X"
-    around_match = re.search(r'(?:around|approximately|about)\s*₹?(\d+)', query.lower())
     if around_match:
         target = float(around_match.group(1))
-        min_price = target - 2
-        max_price = target + 2
     return min_price, max_price
 def extract_mileage_range(query: str) -> tuple:
     """Extract mileage requirements from query"""
     min_mileage, max_mileage = None, None
-    # Look for mileage-related keywords
-    mileage_keywords = ['mileage', 'fuel efficiency', 'kmpl', 'fuel economy']
-    has_mileage_context = any(keyword in query.lower() for keyword in mileage_keywords)
     if has_mileage_context:
         # Pattern for "above X kmpl", "more than X kmpl"
-        above_match = re.search(r'(?:above|more than|at least|over)\s*(\d+)', query.lower())
         if above_match:
             min_mileage = float(above_match.group(1))
         # Pattern for "below X kmpl", "under X kmpl"
-        below_match = re.search(r'(?:below|under|less than)\s*(\d+)', query.lower())
         if below_match:
             max_mileage = float(below_match.group(1))
     return min_mileage, max_mileage
-def extract_features(query: str) -> List[str]:
-    """Extract feature requirements from query"""
-    feature_keywords = [
-        "sunroof", "automatic", "manual", "cruise control", "abs", "airbags",
-        "android auto", "touchscreen", "rear camera", "parking sensor",
-        "bluetooth", "usb", "keyless", "push button", "climate control",
-        "leather seats", "alloy wheels", "fog lights", "power steering",
-        "power windows", "central locking", "music system", "navigation"
     ]
-    return [feat for feat in feature_keywords if feat in query.lower()]
 def get_comparison_cars(query: str) -> List[Dict]:
-    """Handle comparison queries"""
-    # Look for comparison keywords
-    comparison_words = ['vs', 'versus', 'compare', 'comparison', 'better', 'best']
-    if not any(word in query.lower() for word in comparison_words):
-        return []
-    brands = find_brand_mentions(query)
-    models = find_model_mentions(query)
-    if len(brands) >= 2 or len(models) >= 2:
-        # Return cars for comparison
-        if models:
-            return df[df["model"].str.lower().isin(models)].to_dict('records')
-        else:
-            return df[df["brand"].isin(brands)].to_dict('records')
     return []
 def handle_specific_questions(query: str) -> str:
-    """Handle specific question types"""
     query_lower = query.lower()
-    # Price questions
     if any(word in query_lower for word in ['cheapest', 'lowest price', 'most affordable']):
-        cheapest = df.loc[df['price_lakh'].idxmin()]
-        return f"💰 Cheapest car: {cheapest['brand'].title()} {cheapest['model']} at ₹{cheapest['price_lakh']} Lakh"
     if any(word in query_lower for word in ['most expensive', 'highest price', 'premium']):
-        expensive = df.loc[df['price_lakh'].idxmax()]
-        return f"💎 Most expensive car: {expensive['brand'].title()} {expensive['model']} at ₹{expensive['price_lakh']} Lakh"
-    # Mileage questions
     if any(word in query_lower for word in ['best mileage', 'highest mileage', 'most fuel efficient']):
-        best_mileage = df.loc[df['mileage_kmpl'].idxmax()]
-        return f"⛽ Best mileage car: {best_mileage['brand'].title()} {best_mileage['model']} with {best_mileage['mileage_kmpl']} kmpl"
     if any(word in query_lower for word in ['worst mileage', 'lowest mileage', 'least fuel efficient']):
-        worst_mileage = df.loc[df['mileage_kmpl'].idxmin()]
-        return f"⛽ Lowest mileage car: {worst_mileage['brand'].title()} {worst_mileage['model']} with {worst_mileage['mileage_kmpl']} kmpl"
     # Count questions
     if any(word in query_lower for word in ['how many', 'count', 'number of']):
-        if any(brand in query_lower for brand in df['brand'].unique()):
-            brand = next(brand for brand in df['brand'].unique() if brand in query_lower)
             count = len(df[df['brand'] == brand])
-            return f"📊 {brand.title()} has {count} cars in our database"
-        else:
-            return f"📊 Total cars in database: {len(df)}"
     # Average questions
     if 'average' in query_lower:
         if 'price' in query_lower:
-            avg_price = df['price_lakh'].mean()
-            return f"📊 Average car price: ₹{avg_price:.2f} Lakh"
-        elif 'mileage' in query_lower:
-            avg_mileage = df['mileage_kmpl'].mean()
-            return f"📊 Average mileage: {avg_mileage:.2f} kmpl"
-    # Brand-specific questions
     brands = find_brand_mentions(query)
-    if brands and any(word in query_lower for word in ['models', 'variants', 'options']):
         brand = brands[0]
         brand_cars = df[df['brand'] == brand]
         models = brand_cars['model'].unique()
-        return f"🚗 {brand.title()} models: {', '.join(models)}"
     return ""
 def format_car_details(car: Dict, show_features: bool = True, compact: bool = False) -> str:
-    """Format car details for display"""
     if compact:
-        # Compact format for showing many cars
-        return f"🚗 {car['brand'].title()} {car['model']} | ₹{car['price_lakh']}L | {car['mileage_kmpl']} kmpl | {car['engine']}"
     features_text = ""
-    if show_features and 'features' in car:
-        features = car['features'][:200] + "..." if len(car['features']) > 200 else car['features']
-        features_text = f"- Features: {features.title()}\n"
-    return f"""🚗 {car['brand'].title()} {car['model']}
-- Engine: {car['engine']}
-- Mileage: {car['mileage_kmpl']} kmpl
-- Price: ₹{car['price_lakh']} Lakh
-{features_text}"""
 def answer_question(query: str) -> str:
     if not query.strip():
         return "❓ Please ask me something about Indian cars!"
     query = query.strip()
-    # Handle specific questions first
     specific_answer = handle_specific_questions(query)
     if specific_answer:
         return specific_answer
-    # Handle comparisons
     comparison_cars = get_comparison_cars(query)
     if comparison_cars:
-        response = "📊 Car Comparison:\n\n"
-        for car in comparison_cars[:3]:  # Limit to 3 cars
-            response += format_car_details(car, show_features=False) + "\n"
         return response.strip()
-    # Check for specific car mention (brand + model)
     for _, row in df.iterrows():
         car_name = f"{row['brand']} {row['model']}".lower()
-        if car_name in query.lower():
-            return f"📌 {row['brand'].title()} {row['model']} Details:\n" + format_car_details(row.to_dict())
-    # Start filtering
     filtered_df = df.copy()
     # Filter by brand
     brands = find_brand_mentions(query)
     if brands:
         filtered_df = filtered_df[filtered_df["brand"].isin(brands)]
-    # Filter by model
     models = find_model_mentions(query)
     if models:
         filtered_df = filtered_df[filtered_df["model"].str.lower().isin(models)]
     # Filter by price
     min_price, max_price = extract_price_range(query)
     if min_price is not None:
-        filtered_df = filtered_df[filtered_df["price_lakh"] >= min_price]
     if max_price is not None:
-        filtered_df = filtered_df[filtered_df["price_lakh"] <= max_price]
-    # Filter by mileage
     min_mileage, max_mileage = extract_mileage_range(query)
     if min_mileage is not None:
-        filtered_df = filtered_df[filtered_df["mileage_kmpl"] >= min_mileage]
     if max_mileage is not None:
-        filtered_df = filtered_df[filtered_df["mileage_kmpl"] <= max_mileage]
-    # Filter by features
-    features = extract_features(query)
-    for feature in features:
-        filtered_df = filtered_df[filtered_df["features"].str.contains(feature, na=False)]
-    # Sort results based on query intent
-    if any(word in query.lower() for word in ['cheap', 'affordable', 'budget']):
-        filtered_df = filtered_df.sort_values('price_lakh')
-    elif any(word in query.lower() for word in ['expensive', 'premium', 'luxury']):
-        filtered_df = filtered_df.sort_values('price_lakh', ascending=False)
-    elif any(word in query.lower() for word in ['mileage', 'fuel efficient', 'economy']):
-        filtered_df = filtered_df.sort_values('mileage_kmpl', ascending=False)
-    # Generate response
     if filtered_df.empty:
-        return "❌ No matching cars found for your query. Try adjusting your requirements!"
     response = ""
-    # Check if it's a simple brand query (show all cars from that brand)
     is_simple_brand_query = (
-        len(brands) == 1 and
-        not models and
-        min_price is None and max_price is None and
-        min_mileage is None and max_mileage is None and
-        not features and
-        not any(word in query.lower() for word in ['cheap', 'expensive', 'best', 'compare', 'vs'])
     )
-    if is_simple_brand_query and len(filtered_df) > 3:
-        # Show all cars for simple brand queries in compact format
-        response += f"🏷️ All {brands[0].title()} cars in our database ({len(filtered_df)} models):\n\n"
         for _, row in filtered_df.iterrows():
-            response += format_car_details(row.to_dict(), show_features=False, compact=True) + "\n"
-        # Add a summary
-        avg_price = filtered_df['price_lakh'].mean()
-        avg_mileage = filtered_df['mileage_kmpl'].mean()
-        price_range = f"₹{filtered_df['price_lakh'].min()}-{filtered_df['price_lakh'].max()}L"
-        response += f"\n📊 Summary: Average price: ₹{avg_price:.1f}L | Average mileage: {avg_mileage:.1f} kmpl | Price range: {price_range}"
     else:
-        # Regular detailed format for filtered results
-        if len(filtered_df) > 1:
-            response += f"Found {len(filtered_df)} matching cars:\n\n"
-        # Determine how many cars to show in detail
-        max_detailed_cars = 8 if len(filtered_df) <= 10 else 5
         for _, row in filtered_df.head(max_detailed_cars).iterrows():
-            entry = format_car_details(row.to_dict()) + "\n"
             if len(response + entry) > MAX_TOTAL_CHARACTERS:
                 break
             response += entry
-        if len(filtered_df) > max_detailed_cars:
-            response += f"\n... and {len(filtered_df) - max_detailed_cars} more cars match your criteria."
     return response.strip()
 # Enhanced Gradio interface
@@ -298,9 +590,12 @@ examples = [
     "Best mileage car under 10 lakhs",
     "Mahindra cars with price and mileage",
     "Cars between 5 and 15 lakhs",
-    "Which car has the best features?",
     "Show me all Honda models",
-    "Average price of cars in database"
 ]
 gr.Interface(
@@ -315,7 +610,7 @@ gr.Interface(
         label="Car Information"
     ),
     title="🚘 Enhanced Indian Car AI Assistant",
-    description="Ask me anything about Indian cars! I can help with comparisons, recommendations, specifications, and more.",
     examples=examples,
     theme="soft"
 ).launch()

 import numpy as np
 from typing import List, Dict, Any
+# --- Data Loading and Initial Cleaning ---
 # Load and clean the dataset
+# Assuming indian_car_info.csv is properly formatted CSV.
+# If your CSV is literally just lines of text without proper CSV quoting for features,
+# you'd need a more complex custom parser than pd.read_csv.
+# Given the snippet, it looks like features are quoted, which pd.read_csv handles.
+try:
+    df = pd.read_csv("indian_car_info.csv")
+except FileNotFoundError:
+    print("Error: indian_car_info.csv not found. Please ensure the file is in the same directory.")
+    exit()
 # Clean brand and model columns
 df["brand"] = df["brand"].str.strip().str.lower()
 df["model"] = df["model"].str.strip()
+df["features"] = df["features"].astype(str).str.lower() # Ensure features are string and lowercased
+# --- Advanced Data Preprocessing: Parsing Ranges and Inferring Categories ---
+def parse_price_mileage_range(value_str: Any) -> tuple:
+    """
+    Parses a string like '24.8-25.75' or '452 km range' or '6.49' into (min_val, max_val).
+    Returns (np.nan, np.nan) if parsing fails.
+    """
+    if pd.isna(value_str):
+        return np.nan, np.nan
+    value_str = str(value_str).lower().replace('₹', '').replace('lakh', '').strip()
+    # Handle EV ranges (e.g., "452 km range")
+    if 'km range' in value_str:
+        try:
+            val = float(re.search(r'(\d+\.?\d*)', value_str).group(1))
+            return val, val
+        except (AttributeError, ValueError):
+            return np.nan, np.nan
+    # Handle "Expected ~18-20" or "Expected ~10-15"
+    if 'expected' in value_str:
+        nums = re.findall(r'\d+\.?\d*', value_str)
+        if len(nums) == 2:
+            try:
+                return float(nums[0]), float(nums[1])
+            except ValueError:
+                return np.nan, np.nan
+        elif len(nums) == 1: # Single expected value
+            try:
+                return float(nums[0]), float(nums[0])
+            except ValueError:
+                return np.nan, np.nan
+        return np.nan, np.nan
+    # Handle numeric ranges (e.g., "24.8-25.75", "6.49-9.64")
+    if '-' in value_str:
+        try:
+            parts = [float(p.strip()) for p in value_str.split('-')]
+            return min(parts), max(parts)
+        except ValueError:
+            return np.nan, np.nan
+    else: # Single numeric value (e.g., "23.27", "12.1")
+        try:
+            val = float(value_str)
+            return val, val
+        except ValueError:
+            return np.nan, np.nan
+# Apply the parsing function to create min/max columns
+df[['mileage_kmpl_min', 'mileage_kmpl_max']] = df['mileage_kmpl'].apply(lambda x: pd.Series(parse_price_mileage_range(x)))
+df[['price_lakh_min', 'price_lakh_max']] = df['price_lakh'].apply(lambda x: pd.Series(parse_price_mileage_range(x)))
+# Infer new columns from 'engine' and 'features' for better filtering
+def infer_car_attributes(row: pd.Series) -> pd.Series:
+    engine = row['engine'].lower()
+    features = row['features'].lower()
+    # Fuel Type
+    fuel_type = 'petrol' # Default
+    if 'diesel' in engine or 'diesel' in features:
+        fuel_type = 'diesel'
+    elif 'cng' in engine or 'cng' in features:
+        fuel_type = 'cng'
+    elif 'electric' in engine or 'electric' in features or 'motor' in engine: # Covers Permanent Magnet Synchronous Motor
+        fuel_type = 'electric'
+    elif 'hybrid' in engine or 'hybrid' in features:
+        fuel_type = 'hybrid' # Can be strong or mild, more specific parsing needed for differentiation
+    # Transmission
+    transmission = 'manual' # Default
+    if 'automatic' in features or 'at' in features or 'amt' in features or 'dct' in features or 'cvt' in features:
+        transmission = 'automatic'
+    elif 'paddle shifters' in features: # Usually implies AT
+        transmission = 'automatic'
+    elif 'manual' in features or 'mt' in features: # Explicitly manual
+        transmission = 'manual' # Manual overrides automatic if both are mentioned, pick one convention.
+                               # For cars with both (e.g., different variants), this will just pick one.
+                               # A more complex model would store both or create specific flags.
+    # Seating Capacity
+    seating_capacity = np.nan
+    seat_match = re.search(r'(\d+)-seater', features)
+    if seat_match:
+        seating_capacity = int(seat_match.group(1))
+    elif '6/7 seater' in features: # Common pattern
+        seating_capacity = 7 # Assume 7 for flexibility, or you can pick 6
+    elif 'true 7-seater' in features:
+        seating_capacity = 7
+    elif 'modular 7-seater' in features:
+        seating_capacity = 7
+    elif '5 seater' in features: # Specific for 5 seater
+        seating_capacity = 5
+    # Body Type (more complex, using keywords)
+    body_type = 'other'
+    if 'suv' in features or 'suv' in row['model'].lower():
+        body_type = 'suv'
+    elif 'sedan' in features or 'sedan' in row['model'].lower():
+        body_type = 'sedan'
+    elif 'hatchback' in features or 'hatchback' in row['model'].lower():
+        body_type = 'hatchback'
+    elif 'muv' in features:
+        body_type = 'muv'
+    elif 'pickup truck' in features:
+        body_type = 'pickup'
+    return pd.Series({
+        'fuel_type': fuel_type,
+        'transmission': transmission,
+        'seating_capacity': seating_capacity,
+        'body_type': body_type
+    })
+# Apply attribute inference to the DataFrame
+df = df.assign(**df.apply(infer_car_attributes, axis=1).to_dict('list'))
+# Convert numeric columns to appropriate types, coercing errors to NaN
+numeric_cols = ['mileage_kmpl_min', 'mileage_kmpl_max', 'price_lakh_min', 'price_lakh_max', 'seating_capacity']
+for col in numeric_cols:
+    df[col] = pd.to_numeric(df[col], errors='coerce')
 # Control long responses
 MAX_TOTAL_CHARACTERS = 5000
+# --- Helper Functions for Query Parsing ---
 def extract_numbers(text: str) -> List[float]:
     """Extract all numbers from text"""
     return [float(x) for x in re.findall(r'\d+\.?\d*', text)]
 def find_brand_mentions(query: str) -> List[str]:
     """Find all brand mentions in query"""
     unique_brands = df["brand"].unique()
+    # Use a more specific regex to avoid partial matches and prefer full words
+    found_brands = []
+    for brand in unique_brands:
+        if re.search(r'\b' + re.escape(brand) + r'\b', query.lower()):
+            found_brands.append(brand)
+    return found_brands
 def find_model_mentions(query: str) -> List[str]:
     """Find all model mentions in query"""
     unique_models = df["model"].str.lower().unique()
+    found_models = []
+    for model in unique_models:
+        if re.search(r'\b' + re.escape(model) + r'\b', query.lower()):
+            found_models.append(model)
+    return found_models
 def extract_price_range(query: str) -> tuple:
     """Extract price range from query"""
     min_price, max_price = None, None
+    query = query.lower()
+    # Pattern for "under X", "below X", "less than X", "up to X"
+    under_match = re.search(r'(?:under|below|less than|up to)\s*₹?(\d+\.?\d*)', query)
     if under_match:
         max_price = float(under_match.group(1))
+    # Pattern for "above X", "more than X", "at least X", "over X"
+    above_match = re.search(r'(?:above|more than|at least|over)\s*₹?(\d+\.?\d*)', query)
     if above_match:
         min_price = float(above_match.group(1))
     # Pattern for "between X and Y"
+    between_match = re.search(r'between\s*₹?(\d+\.?\d*)\s*(?:and|to)\s*₹?(\d+\.?\d*)', query)
     if between_match:
+        p1 = float(between_match.group(1))
+        p2 = float(between_match.group(2))
+        min_price = min(p1, p2)
+        max_price = max(p1, p2)
     # Pattern for "around X", "approximately X"
+    around_match = re.search(r'(?:around|approximately|about)\s*₹?(\d+\.?\d*)', query)
     if around_match:
         target = float(around_match.group(1))
+        min_price = target * 0.8 # +/- 20% tolerance for "around"
+        max_price = target * 1.2
     return min_price, max_price
 def extract_mileage_range(query: str) -> tuple:
     """Extract mileage requirements from query"""
     min_mileage, max_mileage = None, None
+    query = query.lower()
+    mileage_keywords = ['mileage', 'fuel efficiency', 'kmpl', 'fuel economy', 'range']
+    has_mileage_context = any(keyword in query for keyword in mileage_keywords)
     if has_mileage_context:
         # Pattern for "above X kmpl", "more than X kmpl"
+        above_match = re.search(r'(?:above|more than|at least|over)\s*(\d+\.?\d*)(?:\s*kmpl|\s*km range)?', query)
         if above_match:
             min_mileage = float(above_match.group(1))
         # Pattern for "below X kmpl", "under X kmpl"
+        below_match = re.search(r'(?:below|under|less than)\s*(\d+\.?\d*)(?:\s*kmpl|\s*km range)?', query)
         if below_match:
             max_mileage = float(below_match.group(1))
     return min_mileage, max_mileage
+def extract_features_from_query(query: str) -> List[str]:
+    """Extract general feature requirements from query that map to the 'features' column."""
+    query = query.lower()
+    # Expanded list of relevant keywords to search in the 'features' column
+    # Ensure these are distinct from attributes derived into separate columns
+    general_feature_keywords = [
+        "sunroof", "panoramic sunroof", "360-degree camera", "head-up display",
+        "hud", "wireless charging", "ambient lighting", "cruise control",
+        "rear ac vents", "push start/stop", "electrically adjustable orvms",
+        "automatic climate control", "digital speedometer", "dual airbags",
+        "abs", "ebd", "reverse parking sensors", "connected car tech", "bluelink",
+        "adas", "hyundai smartsense", "honda sensing", "ventilated front seats",
+        "dual zone climate control", "electronic parking brake", "auto hold",
+        "apple carplay", "android auto", "bose premium sound", "jbl sound system",
+        "powered tailgate", "terrain response modes", "digital instrument cluster",
+        "air purifier", "traction pro mode", "dca automatic", "voice assistant",
+        "paddle shifters", "allgrip awd", "4x4", "ladder frame chassis", "hill hold assist",
+        "hill descent control", "ventilated front cup holders", "smart hybrid technology",
+        "uv-cut glass", "boosterjet engine", "ADAS", "i-smart", "digital bluetooth key",
+        "heated orvms", "personal ai assistant", "ultrafast charging", "v2l",
+        "sliding center console", "panoramic vision roof", "meridian premium sound",
+        "augmented reality hud", "multi-terrain modes", "multi-mode regen", "frunk",
+        "voice-enabled sunroof", "dashcam", "footwell lighting", "heated seats",
+        "powered driver seat", "triple-zone climate control", "differential lock",
+        "active traction control", "dac", "cooler box", "lane watch camera",
+        "multi-sense driving modes", "easyfix seats", "detachable 3rd row",
+        "digital cockpit", "4motion all-wheel drive", "park assist", "citroen advanced comfort",
+        "customization options", "high ground clearance", "comfortable ride",
+        "roof mounted rear ac vents", "true 7-seater", "washable interior",
+        "tyre pressure monitoring system", "mld technology", "micro hybrid technology",
+        "static bending headlamps", "robust build quality", "harmon infotainment",
+        "corner stability control", "rain sensing wipers", "automatic headlamps",
+        "multi-zone climate control"
     ]
+    matched_features = []
+    for keyword in general_feature_keywords:
+        if keyword in query:
+            matched_features.append(keyword)
+    return matched_features
 def get_comparison_cars(query: str) -> List[Dict]:
+    """Handle comparison queries by identifying car names mentioned."""
+    query_lower = query.lower()
+    # Keywords indicating a comparison
+    comparison_words = ['vs', 'versus', 'compare', 'comparison', 'better', 'which is', 'difference between']
+    if not any(word in query_lower for word in comparison_words):
+        return []
+    # Attempt to find two distinct car models for comparison
+    car_names_in_query = []
+    # Create a list of all possible full car names (brand + model) and model names
+    all_car_ids = df['brand'] + ' ' + df['model']
+    all_models = df['model']
+    # Prioritize full car names, then standalone models if they are unique enough
+    for i, full_name in all_car_ids.items():
+        if full_name.lower() in query_lower and full_name.lower() not in [c['full_name'] for c in car_names_in_query]:
+            car_names_in_query.append({'type': 'full', 'name': df.loc[i, 'model'].lower(), 'brand': df.loc[i, 'brand'].lower(), 'full_name': full_name.lower()})
+    # If we found at least two specific cars, proceed
+    if len(car_names_in_query) >= 2:
+        # Filter df to get details of these specific cars
+        compared_models = [car['name'] for car in car_names_in_query]
+        return df[df["model"].str.lower().isin(compared_models)].to_dict('records')
     return []
 def handle_specific_questions(query: str) -> str:
+    """Handle specific question types like 'cheapest', 'best mileage', 'how many', 'average'."""
     query_lower = query.lower()
+    # Price questions (using _min columns)
     if any(word in query_lower for word in ['cheapest', 'lowest price', 'most affordable']):
+        cheapest = df.loc[df['price_lakh_min'].idxmin()]
+        return f"💰 The cheapest car in our database is the {cheapest['brand'].title()} {cheapest['model']} at ₹{cheapest['price_lakh_min']:.2f} Lakh."
     if any(word in query_lower for word in ['most expensive', 'highest price', 'premium']):
+        expensive = df.loc[df['price_lakh_min'].idxmax()]
+        return f"💎 The most expensive car in our database is the {expensive['brand'].title()} {expensive['model']} at ₹{expensive['price_lakh_min']:.2f} Lakh."
+    # Mileage questions (using _min columns)
     if any(word in query_lower for word in ['best mileage', 'highest mileage', 'most fuel efficient']):
+        best_mileage = df.loc[df['mileage_kmpl_min'].idxmax()]
+        # Check if it's an EV range
+        mileage_text = f"{best_mileage['mileage_kmpl_min']} kmpl"
+        if 'electric' in best_mileage['fuel_type'] or 'hybrid' in best_mileage['fuel_type']:
+            mileage_text = f"{best_mileage['mileage_kmpl_min']} km range (Electric/Hybrid)"
+        return f"⛽ The car with the best mileage/range is the {best_mileage['brand'].title()} {best_mileage['model']} with {mileage_text}."
     if any(word in query_lower for word in ['worst mileage', 'lowest mileage', 'least fuel efficient']):
+        worst_mileage = df.loc[df['mileage_kmpl_min'].idxmin()]
+        mileage_text = f"{worst_mileage['mileage_kmpl_min']} kmpl"
+        if 'electric' in worst_mileage['fuel_type'] or 'hybrid' in worst_mileage['fuel_type']:
+            mileage_text = f"{worst_mileage['mileage_kmpl_min']} km range (Electric/Hybrid)"
+        return f"⛽ The car with the lowest mileage/range is the {worst_mileage['brand'].title()} {worst_mileage['model']} with {mileage_text}."
     # Count questions
     if any(word in query_lower for word in ['how many', 'count', 'number of']):
+        brands = find_brand_mentions(query)
+        if brands:
+            brand = brands[0]
             count = len(df[df['brand'] == brand])
+            return f"📊 {brand.title()} has {count} car models in our database."
+        fuel_types = ['petrol', 'diesel', 'electric', 'cng', 'hybrid']
+        for ft in fuel_types:
+            if ft in query_lower and 'car' in query_lower:
+                count = len(df[df['fuel_type'] == ft])
+                return f"📊 There are {count} {ft.title()} car models in our database."
+        transmissions = ['automatic', 'manual']
+        for tr in transmissions:
+            if tr in query_lower and 'car' in query_lower:
+                count = len(df[df['transmission'] == tr])
+                return f"📊 There are {count} {tr.title()} transmission car models in our database."
+        seating_match = re.search(r'(\d+)\s*seat', query_lower)
+        if seating_match:
+            seats = int(seating_match.group(1))
+            count = len(df[df['seating_capacity'] == seats])
+            return f"��� There are {count} car models with {seats} seats in our database."
+        return f"📊 Total cars in database: {len(df)}."
     # Average questions
     if 'average' in query_lower:
         if 'price' in query_lower:
+            avg_price = df['price_lakh_min'].mean()
+            return f"📊 The average minimum car price in our database is ₹{avg_price:.2f} Lakh."
+        elif 'mileage' in query_lower or 'fuel efficiency' in query_lower:
+            avg_mileage = df['mileage_kmpl_min'].mean()
+            return f"📊 The average minimum mileage/range in our database is {avg_mileage:.2f} kmpl/km."
+    # Brand-specific models/variants
     brands = find_brand_mentions(query)
+    if brands and any(word in query_lower for word in ['models', 'variants', 'options', 'cars']):
         brand = brands[0]
         brand_cars = df[df['brand'] == brand]
         models = brand_cars['model'].unique()
+        return f"🚗 {brand.title()} has the following models in our database: {', '.join(models)}."
+    # Handle subjective questions that can't be answered directly
+    if 'best features' in query_lower or 'most luxurious' in query_lower or 'most reliable' in query_lower or 'safest' in query_lower:
+        return "🤔 That's a great question, but 'best' or 'most luxurious' can be subjective! I can tell you about specific features if you ask, or list cars based on quantifiable criteria like price, mileage, or presence of ADAS/sunroof."
     return ""
 def format_car_details(car: Dict, show_features: bool = True, compact: bool = False) -> str:
+    """Format car details for display."""
+    mileage_text = f"{car.get('mileage_kmpl_min', 'N/A')} kmpl"
+    if 'electric' in str(car.get('fuel_type', '')).lower() or 'hybrid' in str(car.get('fuel_type', '')).lower():
+        mileage_text = f"{car.get('mileage_kmpl_min', 'N/A')} km range"
     if compact:
+        return (
+            f"🚗 {car.get('brand', '').title()} {car.get('model', '')} | "
+            f"₹{car.get('price_lakh_min', 'N/A')}L | {mileage_text} | "
+            f"{car.get('engine', 'N/A')}"
+        )
     features_text = ""
+    if show_features and 'features' in car and pd.notna(car['features']):
+        features = car['features']
+        # Limit features to 200 chars and append "..." if truncated
+        display_features = features[:200]
+        if len(features) > 200:
+            display_features += "..."
+        features_text = f"- Key Features: {display_features.title()}\n"
+    details = f"""🚗 {car.get('brand', '').title()} {car.get('model', '')}
+- Engine: {car.get('engine', 'N/A')}
+- Fuel Type: {str(car.get('fuel_type', 'N/A')).title()}
+- Transmission: {str(car.get('transmission', 'N/A')).title()}
+- Mileage/Range: {mileage_text}
+- Price: ₹{car.get('price_lakh_min', 'N/A')}-{car.get('price_lakh_max', 'N/A')} Lakh
+"""
+    if pd.notna(car.get('seating_capacity')):
+        details += f"- Seating: {int(car['seating_capacity'])}-seater\n"
+    if pd.notna(car.get('body_type')):
+        details += f"- Body Type: {str(car.get('body_type', 'N/A')).title()}\n"
+    details += features_text
+    return details
+# --- Main Answer Function ---
 def answer_question(query: str) -> str:
     if not query.strip():
         return "❓ Please ask me something about Indian cars!"
     query = query.strip()
+    query_lower = query.lower() # Use this for all case-insensitive checks
+    # 1. Handle specific questions (e.g., "cheapest car", "how many Maruti cars")
     specific_answer = handle_specific_questions(query)
     if specific_answer:
         return specific_answer
+    # 2. Handle direct car comparisons (e.g., "Creta vs Seltos")
     comparison_cars = get_comparison_cars(query)
     if comparison_cars:
+        response = "📊 Here's a comparison of the cars you asked about:\n\n"
+        for car in comparison_cars:
+            response += format_car_details(car, show_features=True) + "\n"
+            if len(response) > MAX_TOTAL_CHARACTERS * 0.8: # Limit comparison length
+                response += "\n... (some details truncated for brevity)\n"
+                break
         return response.strip()
+    # 3. Check for single specific car mention (e.g., "details of Tata Nexon")
+    # This should be after comparison to allow "compare Nexon vs Harrier"
     for _, row in df.iterrows():
         car_name = f"{row['brand']} {row['model']}".lower()
+        # Look for full car name or model name preceded by brand
+        if car_name in query_lower or (row['model'].lower() in query_lower and row['brand'].lower() in query_lower):
+            # Ensure it's a specific query for details, not a filter
+            if any(k in query_lower for k in ['details', 'info', 'specifications', 'tell me about', 'what is the']):
+                return f"📌 {row['brand'].title()} {row['model']} Details:\n" + format_car_details(row.to_dict())
+    # 4. General Filtering based on Criteria
     filtered_df = df.copy()
     # Filter by brand
     brands = find_brand_mentions(query)
     if brands:
         filtered_df = filtered_df[filtered_df["brand"].isin(brands)]
+    # Filter by model (if specific models are requested alongside other filters)
     models = find_model_mentions(query)
     if models:
         filtered_df = filtered_df[filtered_df["model"].str.lower().isin(models)]
     # Filter by price
     min_price, max_price = extract_price_range(query)
     if min_price is not None:
+        filtered_df = filtered_df[filtered_df["price_lakh_min"] >= min_price]
     if max_price is not None:
+        filtered_df = filtered_df[filtered_df["price_lakh_max"] <= max_price] # Use max_price_max for upper bound
+    # Filter by mileage/range
     min_mileage, max_mileage = extract_mileage_range(query)
     if min_mileage is not None:
+        filtered_df = filtered_df[filtered_df["mileage_kmpl_min"] >= min_mileage]
     if max_mileage is not None:
+        filtered_df = filtered_df[filtered_df["mileage_kmpl_max"] <= max_mileage] # Use mileage_kmpl_max for upper bound
+    # Filter by fuel type
+    fuel_types_in_query = []
+    if 'petrol' in query_lower: fuel_types_in_query.append('petrol')
+    if 'diesel' in query_lower: fuel_types_in_query.append('diesel')
+    if 'electric' in query_lower or 'ev' in query_lower: fuel_types_in_query.append('electric')
+    if 'cng' in query_lower: fuel_types_in_query.append('cng')
+    if 'hybrid' in query_lower: fuel_types_in_query.append('hybrid') # Captures both strong and mild
+    if fuel_types_in_query:
+        filtered_df = filtered_df[filtered_df['fuel_type'].isin(fuel_types_in_query)]
+    # Filter by transmission
+    transmission_types_in_query = []
+    if 'automatic' in query_lower or 'auto' in query_lower: transmission_types_in_query.append('automatic')
+    if 'manual' in query_lower: transmission_types_in_query.append('manual')
+    if transmission_types_in_query:
+        filtered_df = filtered_df[filtered_df['transmission'].isin(transmission_types_in_query)]
+    # Filter by seating capacity
+    seating_match = re.search(r'(\d+)\s*seater', query_lower)
+    if seating_match:
+        seats = int(seating_match.group(1))
+        # Allow for "5-seater" queries to include vehicles that *can* be 5-seater
+        filtered_df = filtered_df[filtered_df['seating_capacity'] == seats]
+    # Filter by body type
+    body_types_in_query = []
+    if 'suv' in query_lower: body_types_in_query.append('suv')
+    if 'sedan' in query_lower: body_types_in_query.append('sedan')
+    if 'hatchback' in query_lower: body_types_in_query.append('hatchback')
+    if 'muv' in query_lower: body_types_in_query.append('muv')
+    if 'pickup' in query_lower: body_types_in_query.append('pickup')
+    if body_types_in_query:
+        filtered_df = filtered_df[filtered_df['body_type'].isin(body_types_in_query)]
+    # Filter by general features (from 'features' column)
+    general_features_from_query = extract_features_from_query(query)
+    for feature_keyword in general_features_from_query:
+        filtered_df = filtered_df[filtered_df["features"].str.contains(feature_keyword, na=False)]
+    # Final Response Generation
     if filtered_df.empty:
+        return "❌ No matching cars found for your query. Please try adjusting your requirements or asking a broader question."
     response = ""
+    # Sort results based on query intent
+    if any(word in query_lower for word in ['cheap', 'affordable', 'budget', 'lowest price']):
+        filtered_df = filtered_df.sort_values('price_lakh_min')
+    elif any(word in query_lower for word in ['expensive', 'premium', 'luxury', 'highest price']):
+        filtered_df = filtered_df.sort_values('price_lakh_min', ascending=False)
+    elif any(word in query_lower for word in ['mileage', 'fuel efficient', 'economy', 'best mileage']):
+        filtered_df = filtered_df.sort_values('mileage_kmpl_min', ascending=False)
+    else: # Default sort
+        filtered_df = filtered_df.sort_values('price_lakh_min')
+    # Determine how many cars to show in detail vs. compact list
+    total_matches = len(filtered_df)
+    # If a very broad query resulting in many cars from one brand, or just a brand name, show compact list
     is_simple_brand_query = (
+        len(brands) == 1 and total_matches > 5 and
+        not models and not min_price and not max_price and
+        not min_mileage and not max_mileage and not general_features_from_query and
+        not any(ft in query_lower for ft in fuel_types_in_query) and
+        not any(tt in query_lower for tt in transmission_types_in_query) and
+        not seating_match and not any(bt in query_lower for bt in body_types_in_query) and
+        not any(word in query_lower for word in ['cheap', 'expensive', 'best', 'compare', 'vs', 'average', 'count', 'how many'])
     )
+    if is_simple_brand_query:
+        response += f"🏷️ Found {total_matches} {brands[0].title()} models. Here's a summary:\n\n"
         for _, row in filtered_df.iterrows():
+            entry = format_car_details(row.to_dict(), show_features=False, compact=True) + "\n"
+            if len(response + entry) > MAX_TOTAL_CHARACTERS * 0.9:
+                response += "\n... (further results truncated for length)\n"
+                break
+            response += entry
+        # Add a summary for simple brand queries
+        avg_price = filtered_df['price_lakh_min'].mean()
+        avg_mileage = filtered_df['mileage_kmpl_min'].mean()
+        price_range = f"₹{filtered_df['price_lakh_min'].min():.2f}-{filtered_df['price_lakh_max'].max():.2f} Lakh"
+        response += f"\n📊 Summary for {brands[0].title()} cars: Avg. Min Price: ₹{avg_price:.2f}L | Avg. Min Mileage/Range: {avg_mileage:.2f} | Price Range: {price_range}"
     else:
+        # For more specific filters, show more detailed info, up to a limit
+        response_header = f"Found {total_matches} matching cars:\n\n" if total_matches > 1 else "Found 1 matching car:\n\n"
+        response += response_header
+        # Limit detailed results to make response manageable
+        max_detailed_cars = 8 if total_matches <= 10 else 5
         for _, row in filtered_df.head(max_detailed_cars).iterrows():
+            entry = format_car_details(row.to_dict(), show_features=True) + "\n"
             if len(response + entry) > MAX_TOTAL_CHARACTERS:
+                response += f"\n... and {total_matches - filtered_df.head(max_detailed_cars).shape[0]} more cars match your criteria. Please refine your search."
                 break
             response += entry
+        if total_matches > max_detailed_cars and len(response) <= MAX_TOTAL_CHARACTERS:
+             response += f"\n... and {total_matches - max_detailed_cars} more cars match your criteria."
     return response.strip()
 # Enhanced Gradio interface
     "Best mileage car under 10 lakhs",
     "Mahindra cars with price and mileage",
     "Cars between 5 and 15 lakhs",
     "Show me all Honda models",
+    "Average price of cars in database",
+    "Electric cars with ADAS",
+    "SUVs with 7 seats and good mileage",
+    "Diesel cars from Toyota",
+    "Tell me about the Skoda Slavia"
 ]
 gr.Interface(
         label="Car Information"
     ),
     title="🚘 Enhanced Indian Car AI Assistant",
+    description="Ask me anything about Indian cars! I can help with comparisons, recommendations, specifications, and more. Data based on 'indian_car_info.csv'.",
     examples=examples,
     theme="soft"
 ).launch()