import gradio as gr import pandas as pd import os import re import html import time import sys from pathlib import Path # Import Groq API client try: from groq import Groq except ImportError: print("Groq API not installed. Run: pip install groq") # Function to load all CSV files from the current directory def load_csv_files(): csv_files = {} current_dir = Path(".") for file in current_dir.glob("*_sorted.csv"): try: df = pd.read_csv(file, encoding='utf-8') # Fill NaN values with empty strings to avoid issues df = df.fillna("") # Clean the city name from the filename city_name = file.stem.replace('_sorted', '') city_name = city_name.replace('_', ' ').title() csv_files[city_name] = df except Exception as e: print(f"Error loading {file}: {e}") return csv_files # Function to get unique queries for a specific city def get_queries_for_city(city): if city not in all_data: return [] # Get unique queries from the dataframe queries = all_data[city]['query'].dropna().unique().tolist() # Sort queries and filter out empty strings queries = sorted([str(q) for q in queries if q and str(q).strip()]) return queries # Function to find entries that have empty or missing queries def find_empty_queries(city, preserve_order=True): data = all_data.get(city) if data is None: return "City data not found" results = [] for i, row in data.iterrows(): # Check if query is empty or NaN if pd.isna(row['query']) or str(row['query']).strip() == "": # Make sure all values are strings and handle NaN/None values context = str(row['context']) if not pd.isna(row['context']) else "" query = "(No Query)" if pd.isna(row['query']) else str(row['query']) url = str(row['url']) if not pd.isna(row['url']) else "" results.append({ 'url': url, 'context': context, 'query': query, 'original_index': i # Store the original row index }) # Format results using the same HTML formatting as search_data if not results: return "No entries without queries found" # Sort results by their original index if preserve_order is True if preserve_order: results.sort(key=lambda x: x['original_index']) # Create HTML formatted results for clickable links with better styling formatted_results = "
URL: {url_safe}
" # Handle context display safely context = result['context'] try: context_preview = context[:300] + ('...' if len(context) > 300 else '') context_preview = html.escape(context_preview) except (TypeError, AttributeError): context_preview = html.escape(str(context)) formatted_results += f"Context: {context_preview}
" formatted_results += "URL: {url_safe}
" formatted_results += f"Query: {html.escape(str(result['query']))}
" # Handle context display safely context = result['context'] try: context_preview = context[:300] + ('...' if len(context) > 300 else '') context_preview = html.escape(context_preview) except (TypeError, AttributeError): context_preview = html.escape(str(context)) formatted_results += f"Context: {context_preview}
" formatted_results += "") formatted_answer = f"
{formatted_answer}
" html_answer += f"") formatted_thinking = f"
{formatted_thinking}
" html_answer += f"Generated using moonshotai/kimi-k2-instruct-0905 in {completion_time:.2f} seconds