# ========================================== # Smart Excel Analyst - Hugging Face Space # Nepali + English Excel Analytics with AI Chat # ========================================== import gradio as gr import pandas as pd import os import plotly.graph_objects as go from groq import Groq from unidecode import unidecode from indic_transliteration import sanscript # ========================================== # CONFIG # ========================================== GROQ_API_KEY = os.getenv("GROQ_API_KEY", "") if not GROQ_API_KEY: GROQ_API_KEY = "your-groq-api-key-here" current_df = None client = None def get_client(): global client if client is None and GROQ_API_KEY and GROQ_API_KEY != "your-groq-api-key-here": client = Groq(api_key=GROQ_API_KEY) return client # ========================================== # HELPERS # ========================================== def format_num(n): try: if pd.isna(n): return "—" n = float(n) if abs(n) >= 10_000_000: return f"₹{n/10_000_000:.2f} करोड" elif abs(n) >= 100_000: return f"₹{n/100_000:.2f} लाख" elif abs(n) >= 1_000: return f"₹{n:,.0f}" else: return f"₹{n:,.2f}" except: return str(n) def df_to_chunks(df, chunk_size=50): chunks = [] total_rows = len(df) for start in range(0, total_rows, chunk_size): end = min(start + chunk_size, total_rows) chunk_df = df.iloc[start:end] rows_text = [] for idx, row in chunk_df.iterrows(): row_str = " | ".join([f"{col}: {row[col]}" for col in df.columns]) rows_text.append(f"Row {idx + 1}: {row_str}") chunks.append("\n".join(rows_text)) return chunks def find_relevant_chunks(query, chunks, df, max_chunks=4): query_lower = query.lower() query_words = set(query_lower.split()) scored_chunks = [] for i, chunk in enumerate(chunks): chunk_lower = chunk.lower() score = sum(1 for word in query_words if word in chunk_lower) scored_chunks.append((score, i, chunk)) scored_chunks.sort(key=lambda x: x[0], reverse=True) top_chunks = [c[2] for c in scored_chunks[:max_chunks] if c[0] > 0] if not top_chunks: top_chunks = [c[2] for c in scored_chunks[:2]] return top_chunks def get_summary(df): rows, cols, miss = len(df), len(df.columns), int(df.isnull().sum().sum()) nums = df.select_dtypes(include=["number"]).columns.tolist() html = f"""
{rows}
जम्मा रेकर्ड
Total Records
{cols}
कुल कलम
Total Columns
{miss}
अपूर्ण डेटा
Missing Data
""" for col in df.columns: is_num = pd.api.types.is_numeric_dtype(df[col]) t = "💰 Number" if is_num else "📝 Text" m = df[col].isnull().sum() s = "✓ OK" if m == 0 else f"⚠ {int(m)} missing" cl = "#27ae60" if m == 0 else "#e74c3c" html += f'' html += "
Column Type Status
{col}{t}{s}
" if nums: html += '
' for col in nums[:3]: html += f'''
{col}
Total: {format_num(df[col].sum())} | Average: {format_num(df[col].mean())}
''' html += '
' return html + "
" def make_chart(df): nums = df.select_dtypes(include=["number"]).columns.tolist() if not nums: return None vals, labs = [], [] for col in nums[:5]: s = df[col].sum() if pd.notna(s) and s != 0: vals.append(float(s)) labs.append(str(col)[:12]) if not vals: return None fig = go.Figure([go.Bar( x=labs, y=vals, marker_color=['#667eea', '#f5576c', '#4facfe', '#43e97b', '#fa709a'] )]) fig.update_layout( title="जम्मा रकम तुलना | Total Comparison", height=350, template="plotly_white", xaxis_title="Columns", yaxis_title="Amount" ) return fig def get_preview(rows, start, search): global current_df if current_df is None: return pd.DataFrame() df = current_df.copy() if search and search.strip(): search_term = search.strip().lower() def row_matches(row): for cell in row: cell_str = str(cell) try: cell_roman = sanscript.transliterate( cell_str, sanscript.DEVANAGARI, sanscript.ITRANS ).lower() except: cell_roman = cell_str.lower() if search_term in cell_roman: return True return False mask = df.apply(row_matches, axis=1) df = df[mask] start = max(0, int(start)) rows = max(1, int(rows)) rows = min(rows, len(df) - start) if rows <= 0: return pd.DataFrame() return df.iloc[start:start + rows] # ========================================== # CORE FUNCTIONS # ========================================== def process_file(f): global current_df if f is None: return None, "❌ Please upload an Excel file", None try: current_df = pd.read_excel(f) return get_preview(20, 0, ""), get_summary(current_df), make_chart(current_df) except Exception as e: return None, f"❌ Error: {str(e)}", None def chat(msg, hist): global current_df if not msg or not msg.strip(): return "", hist # ✅ Gradio 6.x: dict format with role/content if current_df is None: hist.append({"role": "user", "content": msg}) hist.append({"role": "assistant", "content": "❌ Please upload an Excel file first!"}) return "", hist groq_client = get_client() if not groq_client: hist.append({"role": "user", "content": msg}) hist.append({"role": "assistant", "content": "❌ API key not configured. Please set GROQ_API_KEY in Space secrets."}) return "", hist try: all_chunks = df_to_chunks(current_df, chunk_size=50) relevant_chunks = find_relevant_chunks(msg, all_chunks, current_df, max_chunks=4) data_context = "\n\n".join(relevant_chunks) cols = list(current_df.columns) nums = current_df.select_dtypes(include=["number"]).columns.tolist() stats_lines = [] for col in nums: stats_lines.append( f"{col} → Sum: {current_df[col].sum():.2f}, " f"Mean: {current_df[col].mean():.2f}, " f"Max: {current_df[col].max():.2f}, " f"Min: {current_df[col].min():.2f}" ) stats_text = "\n".join(stats_lines) if stats_lines else "No numeric columns." system_prompt = """You are a smart data analyst assistant. You understand both English and Nepali. Answer using ONLY the provided data. Be specific and reference actual values. If the answer is not in the data, say "This information is not available in the uploaded Excel file." Respond in the same language as the user's question.""" user_message = f"""Excel Columns: {cols} Numeric Statistics: {stats_text} Data Sample: {data_context} Question: {msg} Answer based ONLY on the data above.""" response = groq_client.chat.completions.create( model="llama-3.3-70b-versatile", messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message} ], temperature=0.2, max_tokens=1024 ) reply = response.choices[0].message.content.strip() # ✅ Gradio 6.x dict message format hist.append({"role": "user", "content": msg}) hist.append({"role": "assistant", "content": f"🤖 {reply}"}) return "", hist except Exception as e: hist.append({"role": "user", "content": msg}) hist.append({"role": "assistant", "content": f"❌ Error: {str(e)}"}) return "", hist # ========================================== # GRADIO 6.x UI # ========================================== css = """ .gradio-container {font-family: 'Segoe UI', sans-serif;} """ # ✅ Gradio 6.x: title only in gr.Blocks(), theme/css go in launch() with gr.Blocks(title="Smart Excel Analyst") as demo: gr.Markdown("""

🤖 Smart Excel Analyst

नेपाली + English | Upload Excel → Analyze → Chat

""") gr.Markdown("### 📁 Step 1: Upload Your Excel File") with gr.Row(): with gr.Column(scale=3): file_input = gr.File( label="Choose .xlsx or .xls file", file_types=[".xlsx", ".xls"] ) with gr.Column(scale=1): analyze_btn = gr.Button("📊 Analyze Data", variant="primary", size="lg") gr.Markdown("### 🔍 Step 2: Explore Your Data") with gr.Row(): rows_slider = gr.Slider(minimum=1, maximum=100, value=20, step=1, label="Rows to Show") start_row = gr.Number(value=0, label="Start From Row", minimum=0, step=1) search_box = gr.Textbox( placeholder="Search... (Romanized Nepali supported e.g. 'rajendra')", label="🔍 Search" ) with gr.Row(): with gr.Column(scale=2): preview_table = gr.Dataframe(label="Data Preview", interactive=False) with gr.Column(scale=3): summary_html = gr.HTML() chart_plot = gr.Plot(label="📊 Visualization") gr.Markdown(""" ### 💬 Step 3: Ask Questions About Your Data **Examples | उदाहरणहरू:** - `What is the total Sales?` - `मासिक किस्ता को जम्मा कति हो?` - `Find details about Ram Bahadur` - `Who has the highest loan amount?` """) # ✅ Gradio 6.x: NO type argument - uses dict format by default chatbot = gr.Chatbot(height=450) with gr.Row(): with gr.Column(scale=4): msg_input = gr.Textbox( placeholder="Type your question in English or Nepali...", label="Your Question", lines=2 ) with gr.Column(scale=1): send_btn = gr.Button("Send 📨", variant="primary") gr.Markdown("""
🔒 Your data stays private | Powered by Groq AI
""") # Event Handlers analyze_btn.click(process_file, inputs=file_input, outputs=[preview_table, summary_html, chart_plot]) rows_slider.change(get_preview, inputs=[rows_slider, start_row, search_box], outputs=preview_table) start_row.change(get_preview, inputs=[rows_slider, start_row, search_box], outputs=preview_table) search_box.change(get_preview, inputs=[rows_slider, start_row, search_box], outputs=preview_table) send_btn.click(chat, inputs=[msg_input, chatbot], outputs=[msg_input, chatbot]) msg_input.submit(chat, inputs=[msg_input, chatbot], outputs=[msg_input, chatbot]) # ✅ Gradio 6.x: theme and css go in launch(), server_name for HF Spaces demo.launch( server_name="0.0.0.0", server_port=7860, theme=gr.themes.Soft(), css=css, ssr_mode=False )