Spaces:
Sleeping
Sleeping
| # ========================================== | |
| # Smart Excel Analyst - Hugging Face Space | |
| # Nepali + English Excel Analytics with AI Chat | |
| # ========================================== | |
| import gradio as gr | |
| import pandas as pd | |
| import os | |
| import plotly.graph_objects as go | |
| from groq import Groq | |
| from unidecode import unidecode | |
| from indic_transliteration import sanscript | |
| # ========================================== | |
| # CONFIG | |
| # ========================================== | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY", "") | |
| if not GROQ_API_KEY: | |
| GROQ_API_KEY = "your-groq-api-key-here" | |
| current_df = None | |
| client = None | |
| def get_client(): | |
| global client | |
| if client is None and GROQ_API_KEY and GROQ_API_KEY != "your-groq-api-key-here": | |
| client = Groq(api_key=GROQ_API_KEY) | |
| return client | |
| # ========================================== | |
| # HELPERS | |
| # ========================================== | |
| def format_num(n): | |
| try: | |
| if pd.isna(n): | |
| return "—" | |
| n = float(n) | |
| if abs(n) >= 10_000_000: | |
| return f"₹{n/10_000_000:.2f} करोड" | |
| elif abs(n) >= 100_000: | |
| return f"₹{n/100_000:.2f} लाख" | |
| elif abs(n) >= 1_000: | |
| return f"₹{n:,.0f}" | |
| else: | |
| return f"₹{n:,.2f}" | |
| except: | |
| return str(n) | |
| def df_to_chunks(df, chunk_size=50): | |
| chunks = [] | |
| total_rows = len(df) | |
| for start in range(0, total_rows, chunk_size): | |
| end = min(start + chunk_size, total_rows) | |
| chunk_df = df.iloc[start:end] | |
| rows_text = [] | |
| for idx, row in chunk_df.iterrows(): | |
| row_str = " | ".join([f"{col}: {row[col]}" for col in df.columns]) | |
| rows_text.append(f"Row {idx + 1}: {row_str}") | |
| chunks.append("\n".join(rows_text)) | |
| return chunks | |
| def find_relevant_chunks(query, chunks, df, max_chunks=4): | |
| query_lower = query.lower() | |
| query_words = set(query_lower.split()) | |
| scored_chunks = [] | |
| for i, chunk in enumerate(chunks): | |
| chunk_lower = chunk.lower() | |
| score = sum(1 for word in query_words if word in chunk_lower) | |
| scored_chunks.append((score, i, chunk)) | |
| scored_chunks.sort(key=lambda x: x[0], reverse=True) | |
| top_chunks = [c[2] for c in scored_chunks[:max_chunks] if c[0] > 0] | |
| if not top_chunks: | |
| top_chunks = [c[2] for c in scored_chunks[:2]] | |
| return top_chunks | |
| def get_summary(df): | |
| rows, cols, miss = len(df), len(df.columns), int(df.isnull().sum().sum()) | |
| nums = df.select_dtypes(include=["number"]).columns.tolist() | |
| html = f""" | |
| <div style="padding:15px; font-family:Arial, sans-serif;"> | |
| <div style="display:grid; grid-template-columns:repeat(3, 1fr); gap:10px; margin-bottom:15px;"> | |
| <div style="background:linear-gradient(135deg,#667eea,#764ba2); color:white; padding:15px; border-radius:10px; text-align:center;"> | |
| <div style="font-size:28px; font-weight:bold;">{rows}</div> | |
| <div style="font-size:12px;">जम्मा रेकर्ड<br>Total Records</div> | |
| </div> | |
| <div style="background:linear-gradient(135deg,#f5576c,#f093fb); color:white; padding:15px; border-radius:10px; text-align:center;"> | |
| <div style="font-size:28px; font-weight:bold;">{cols}</div> | |
| <div style="font-size:12px;">कुल कलम<br>Total Columns</div> | |
| </div> | |
| <div style="background:linear-gradient(135deg,#4facfe,#00f2fe); color:white; padding:15px; border-radius:10px; text-align:center;"> | |
| <div style="font-size:28px; font-weight:bold;">{miss}</div> | |
| <div style="font-size:12px;">अपूर्ण डेटा<br>Missing Data</div> | |
| </div> | |
| </div> | |
| <table style="width:100%; font-size:13px; border-collapse:collapse;"> | |
| <thead> | |
| <tr style="background:#34495e; color:white;"> | |
| <th style="padding:10px; text-align:left;">Column</th> | |
| <th style="padding:10px; text-align:center;">Type</th> | |
| <th style="padding:10px; text-align:center;">Status</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| """ | |
| for col in df.columns: | |
| is_num = pd.api.types.is_numeric_dtype(df[col]) | |
| t = "💰 Number" if is_num else "📝 Text" | |
| m = df[col].isnull().sum() | |
| s = "✓ OK" if m == 0 else f"⚠ {int(m)} missing" | |
| cl = "#27ae60" if m == 0 else "#e74c3c" | |
| html += f'<tr style="border-bottom:1px solid #ecf0f1;"><td style="padding:10px; font-weight:500;">{col}</td><td style="padding:10px; text-align:center;">{t}</td><td style="padding:10px; text-align:center; color:{cl}; font-weight:600;">{s}</td></tr>' | |
| html += "</tbody></table>" | |
| if nums: | |
| html += '<div style="margin-top:15px;">' | |
| for col in nums[:3]: | |
| html += f''' | |
| <div style="background:#f8f9fa; padding:12px; margin-bottom:8px; border-radius:8px; border-left:4px solid #3498db;"> | |
| <div style="font-weight:700; margin-bottom:4px; color:#2c3e50;">{col}</div> | |
| <div style="color:#555; font-size:13px;"> | |
| Total: <b>{format_num(df[col].sum())}</b> | | |
| Average: <b>{format_num(df[col].mean())}</b> | |
| </div> | |
| </div>''' | |
| html += '</div>' | |
| return html + "</div>" | |
| def make_chart(df): | |
| nums = df.select_dtypes(include=["number"]).columns.tolist() | |
| if not nums: | |
| return None | |
| vals, labs = [], [] | |
| for col in nums[:5]: | |
| s = df[col].sum() | |
| if pd.notna(s) and s != 0: | |
| vals.append(float(s)) | |
| labs.append(str(col)[:12]) | |
| if not vals: | |
| return None | |
| fig = go.Figure([go.Bar( | |
| x=labs, y=vals, | |
| marker_color=['#667eea', '#f5576c', '#4facfe', '#43e97b', '#fa709a'] | |
| )]) | |
| fig.update_layout( | |
| title="जम्मा रकम तुलना | Total Comparison", | |
| height=350, template="plotly_white", | |
| xaxis_title="Columns", yaxis_title="Amount" | |
| ) | |
| return fig | |
| def get_preview(rows, start, search): | |
| global current_df | |
| if current_df is None: | |
| return pd.DataFrame() | |
| df = current_df.copy() | |
| if search and search.strip(): | |
| search_term = search.strip().lower() | |
| def row_matches(row): | |
| for cell in row: | |
| cell_str = str(cell) | |
| try: | |
| cell_roman = sanscript.transliterate( | |
| cell_str, sanscript.DEVANAGARI, sanscript.ITRANS | |
| ).lower() | |
| except: | |
| cell_roman = cell_str.lower() | |
| if search_term in cell_roman: | |
| return True | |
| return False | |
| mask = df.apply(row_matches, axis=1) | |
| df = df[mask] | |
| start = max(0, int(start)) | |
| rows = max(1, int(rows)) | |
| rows = min(rows, len(df) - start) | |
| if rows <= 0: | |
| return pd.DataFrame() | |
| return df.iloc[start:start + rows] | |
| # ========================================== | |
| # CORE FUNCTIONS | |
| # ========================================== | |
| def process_file(f): | |
| global current_df | |
| if f is None: | |
| return None, "❌ Please upload an Excel file", None | |
| try: | |
| current_df = pd.read_excel(f) | |
| return get_preview(20, 0, ""), get_summary(current_df), make_chart(current_df) | |
| except Exception as e: | |
| return None, f"❌ Error: {str(e)}", None | |
| def chat(msg, hist): | |
| global current_df | |
| if not msg or not msg.strip(): | |
| return "", hist | |
| # ✅ Gradio 6.x: dict format with role/content | |
| if current_df is None: | |
| hist.append({"role": "user", "content": msg}) | |
| hist.append({"role": "assistant", "content": "❌ Please upload an Excel file first!"}) | |
| return "", hist | |
| groq_client = get_client() | |
| if not groq_client: | |
| hist.append({"role": "user", "content": msg}) | |
| hist.append({"role": "assistant", "content": "❌ API key not configured. Please set GROQ_API_KEY in Space secrets."}) | |
| return "", hist | |
| try: | |
| all_chunks = df_to_chunks(current_df, chunk_size=50) | |
| relevant_chunks = find_relevant_chunks(msg, all_chunks, current_df, max_chunks=4) | |
| data_context = "\n\n".join(relevant_chunks) | |
| cols = list(current_df.columns) | |
| nums = current_df.select_dtypes(include=["number"]).columns.tolist() | |
| stats_lines = [] | |
| for col in nums: | |
| stats_lines.append( | |
| f"{col} → Sum: {current_df[col].sum():.2f}, " | |
| f"Mean: {current_df[col].mean():.2f}, " | |
| f"Max: {current_df[col].max():.2f}, " | |
| f"Min: {current_df[col].min():.2f}" | |
| ) | |
| stats_text = "\n".join(stats_lines) if stats_lines else "No numeric columns." | |
| system_prompt = """You are a smart data analyst assistant. You understand both English and Nepali. | |
| Answer using ONLY the provided data. Be specific and reference actual values. | |
| If the answer is not in the data, say "This information is not available in the uploaded Excel file." | |
| Respond in the same language as the user's question.""" | |
| user_message = f"""Excel Columns: {cols} | |
| Numeric Statistics: | |
| {stats_text} | |
| Data Sample: | |
| {data_context} | |
| Question: {msg} | |
| Answer based ONLY on the data above.""" | |
| response = groq_client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_message} | |
| ], | |
| temperature=0.2, | |
| max_tokens=1024 | |
| ) | |
| reply = response.choices[0].message.content.strip() | |
| # ✅ Gradio 6.x dict message format | |
| hist.append({"role": "user", "content": msg}) | |
| hist.append({"role": "assistant", "content": f"🤖 {reply}"}) | |
| return "", hist | |
| except Exception as e: | |
| hist.append({"role": "user", "content": msg}) | |
| hist.append({"role": "assistant", "content": f"❌ Error: {str(e)}"}) | |
| return "", hist | |
| # ========================================== | |
| # GRADIO 6.x UI | |
| # ========================================== | |
| css = """ | |
| .gradio-container {font-family: 'Segoe UI', sans-serif;} | |
| """ | |
| # ✅ Gradio 6.x: title only in gr.Blocks(), theme/css go in launch() | |
| with gr.Blocks(title="Smart Excel Analyst") as demo: | |
| gr.Markdown(""" | |
| <div style="text-align:center; padding:25px; background:linear-gradient(135deg,#667eea 0%,#764ba2 100%); color:white; border-radius:15px; margin-bottom:20px;"> | |
| <h1 style="margin:0; font-size:2.2em;">🤖 Smart Excel Analyst</h1> | |
| <p style="margin:10px 0 0 0; font-size:1.1em; opacity:0.95;"> | |
| नेपाली + English | Upload Excel → Analyze → Chat | |
| </p> | |
| </div> | |
| """) | |
| gr.Markdown("### 📁 Step 1: Upload Your Excel File") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| file_input = gr.File( | |
| label="Choose .xlsx or .xls file", | |
| file_types=[".xlsx", ".xls"] | |
| ) | |
| with gr.Column(scale=1): | |
| analyze_btn = gr.Button("📊 Analyze Data", variant="primary", size="lg") | |
| gr.Markdown("### 🔍 Step 2: Explore Your Data") | |
| with gr.Row(): | |
| rows_slider = gr.Slider(minimum=1, maximum=100, value=20, step=1, label="Rows to Show") | |
| start_row = gr.Number(value=0, label="Start From Row", minimum=0, step=1) | |
| search_box = gr.Textbox( | |
| placeholder="Search... (Romanized Nepali supported e.g. 'rajendra')", | |
| label="🔍 Search" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| preview_table = gr.Dataframe(label="Data Preview", interactive=False) | |
| with gr.Column(scale=3): | |
| summary_html = gr.HTML() | |
| chart_plot = gr.Plot(label="📊 Visualization") | |
| gr.Markdown(""" | |
| ### 💬 Step 3: Ask Questions About Your Data | |
| **Examples | उदाहरणहरू:** | |
| - `What is the total Sales?` | |
| - `मासिक किस्ता को जम्मा कति हो?` | |
| - `Find details about Ram Bahadur` | |
| - `Who has the highest loan amount?` | |
| """) | |
| # ✅ Gradio 6.x: NO type argument - uses dict format by default | |
| chatbot = gr.Chatbot(height=450) | |
| with gr.Row(): | |
| with gr.Column(scale=4): | |
| msg_input = gr.Textbox( | |
| placeholder="Type your question in English or Nepali...", | |
| label="Your Question", | |
| lines=2 | |
| ) | |
| with gr.Column(scale=1): | |
| send_btn = gr.Button("Send 📨", variant="primary") | |
| gr.Markdown(""" | |
| <div style="text-align:center; padding:15px; color:#7f8c8d; margin-top:20px; border-top:1px solid #ecf0f1; font-size:12px;"> | |
| 🔒 Your data stays private | Powered by Groq AI | |
| </div> | |
| """) | |
| # Event Handlers | |
| analyze_btn.click(process_file, inputs=file_input, outputs=[preview_table, summary_html, chart_plot]) | |
| rows_slider.change(get_preview, inputs=[rows_slider, start_row, search_box], outputs=preview_table) | |
| start_row.change(get_preview, inputs=[rows_slider, start_row, search_box], outputs=preview_table) | |
| search_box.change(get_preview, inputs=[rows_slider, start_row, search_box], outputs=preview_table) | |
| send_btn.click(chat, inputs=[msg_input, chatbot], outputs=[msg_input, chatbot]) | |
| msg_input.submit(chat, inputs=[msg_input, chatbot], outputs=[msg_input, chatbot]) | |
| # ✅ Gradio 6.x: theme and css go in launch(), server_name for HF Spaces | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| theme=gr.themes.Soft(), | |
| css=css, | |
| ssr_mode=False | |
| ) |