# ==========================================
# Smart Excel Analyst - Hugging Face Space
# Nepali + English Excel Analytics with AI Chat
# ==========================================
import gradio as gr
import pandas as pd
import os
import plotly.graph_objects as go
from groq import Groq
from unidecode import unidecode
from indic_transliteration import sanscript
# ==========================================
# CONFIG
# ==========================================
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
if not GROQ_API_KEY:
GROQ_API_KEY = "your-groq-api-key-here"
current_df = None
client = None
def get_client():
global client
if client is None and GROQ_API_KEY and GROQ_API_KEY != "your-groq-api-key-here":
client = Groq(api_key=GROQ_API_KEY)
return client
# ==========================================
# HELPERS
# ==========================================
def format_num(n):
try:
if pd.isna(n):
return "—"
n = float(n)
if abs(n) >= 10_000_000:
return f"₹{n/10_000_000:.2f} करोड"
elif abs(n) >= 100_000:
return f"₹{n/100_000:.2f} लाख"
elif abs(n) >= 1_000:
return f"₹{n:,.0f}"
else:
return f"₹{n:,.2f}"
except:
return str(n)
def df_to_chunks(df, chunk_size=50):
chunks = []
total_rows = len(df)
for start in range(0, total_rows, chunk_size):
end = min(start + chunk_size, total_rows)
chunk_df = df.iloc[start:end]
rows_text = []
for idx, row in chunk_df.iterrows():
row_str = " | ".join([f"{col}: {row[col]}" for col in df.columns])
rows_text.append(f"Row {idx + 1}: {row_str}")
chunks.append("\n".join(rows_text))
return chunks
def find_relevant_chunks(query, chunks, df, max_chunks=4):
query_lower = query.lower()
query_words = set(query_lower.split())
scored_chunks = []
for i, chunk in enumerate(chunks):
chunk_lower = chunk.lower()
score = sum(1 for word in query_words if word in chunk_lower)
scored_chunks.append((score, i, chunk))
scored_chunks.sort(key=lambda x: x[0], reverse=True)
top_chunks = [c[2] for c in scored_chunks[:max_chunks] if c[0] > 0]
if not top_chunks:
top_chunks = [c[2] for c in scored_chunks[:2]]
return top_chunks
def get_summary(df):
rows, cols, miss = len(df), len(df.columns), int(df.isnull().sum().sum())
nums = df.select_dtypes(include=["number"]).columns.tolist()
html = f"""
{rows}
जम्मा रेकर्ड
Total Records
{cols}
कुल कलम
Total Columns
{miss}
अपूर्ण डेटा
Missing Data
| Column |
Type |
Status |
"""
for col in df.columns:
is_num = pd.api.types.is_numeric_dtype(df[col])
t = "💰 Number" if is_num else "📝 Text"
m = df[col].isnull().sum()
s = "✓ OK" if m == 0 else f"⚠ {int(m)} missing"
cl = "#27ae60" if m == 0 else "#e74c3c"
html += f'| {col} | {t} | {s} |
'
html += "
"
if nums:
html += '
'
for col in nums[:3]:
html += f'''
{col}
Total: {format_num(df[col].sum())} |
Average: {format_num(df[col].mean())}
'''
html += '
'
return html + "
"
def make_chart(df):
nums = df.select_dtypes(include=["number"]).columns.tolist()
if not nums:
return None
vals, labs = [], []
for col in nums[:5]:
s = df[col].sum()
if pd.notna(s) and s != 0:
vals.append(float(s))
labs.append(str(col)[:12])
if not vals:
return None
fig = go.Figure([go.Bar(
x=labs, y=vals,
marker_color=['#667eea', '#f5576c', '#4facfe', '#43e97b', '#fa709a']
)])
fig.update_layout(
title="जम्मा रकम तुलना | Total Comparison",
height=350, template="plotly_white",
xaxis_title="Columns", yaxis_title="Amount"
)
return fig
def get_preview(rows, start, search):
global current_df
if current_df is None:
return pd.DataFrame()
df = current_df.copy()
if search and search.strip():
search_term = search.strip().lower()
def row_matches(row):
for cell in row:
cell_str = str(cell)
try:
cell_roman = sanscript.transliterate(
cell_str, sanscript.DEVANAGARI, sanscript.ITRANS
).lower()
except:
cell_roman = cell_str.lower()
if search_term in cell_roman:
return True
return False
mask = df.apply(row_matches, axis=1)
df = df[mask]
start = max(0, int(start))
rows = max(1, int(rows))
rows = min(rows, len(df) - start)
if rows <= 0:
return pd.DataFrame()
return df.iloc[start:start + rows]
# ==========================================
# CORE FUNCTIONS
# ==========================================
def process_file(f):
global current_df
if f is None:
return None, "❌ Please upload an Excel file", None
try:
current_df = pd.read_excel(f)
return get_preview(20, 0, ""), get_summary(current_df), make_chart(current_df)
except Exception as e:
return None, f"❌ Error: {str(e)}", None
def chat(msg, hist):
global current_df
if not msg or not msg.strip():
return "", hist
# ✅ Gradio 6.x: dict format with role/content
if current_df is None:
hist.append({"role": "user", "content": msg})
hist.append({"role": "assistant", "content": "❌ Please upload an Excel file first!"})
return "", hist
groq_client = get_client()
if not groq_client:
hist.append({"role": "user", "content": msg})
hist.append({"role": "assistant", "content": "❌ API key not configured. Please set GROQ_API_KEY in Space secrets."})
return "", hist
try:
all_chunks = df_to_chunks(current_df, chunk_size=50)
relevant_chunks = find_relevant_chunks(msg, all_chunks, current_df, max_chunks=4)
data_context = "\n\n".join(relevant_chunks)
cols = list(current_df.columns)
nums = current_df.select_dtypes(include=["number"]).columns.tolist()
stats_lines = []
for col in nums:
stats_lines.append(
f"{col} → Sum: {current_df[col].sum():.2f}, "
f"Mean: {current_df[col].mean():.2f}, "
f"Max: {current_df[col].max():.2f}, "
f"Min: {current_df[col].min():.2f}"
)
stats_text = "\n".join(stats_lines) if stats_lines else "No numeric columns."
system_prompt = """You are a smart data analyst assistant. You understand both English and Nepali.
Answer using ONLY the provided data. Be specific and reference actual values.
If the answer is not in the data, say "This information is not available in the uploaded Excel file."
Respond in the same language as the user's question."""
user_message = f"""Excel Columns: {cols}
Numeric Statistics:
{stats_text}
Data Sample:
{data_context}
Question: {msg}
Answer based ONLY on the data above."""
response = groq_client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message}
],
temperature=0.2,
max_tokens=1024
)
reply = response.choices[0].message.content.strip()
# ✅ Gradio 6.x dict message format
hist.append({"role": "user", "content": msg})
hist.append({"role": "assistant", "content": f"🤖 {reply}"})
return "", hist
except Exception as e:
hist.append({"role": "user", "content": msg})
hist.append({"role": "assistant", "content": f"❌ Error: {str(e)}"})
return "", hist
# ==========================================
# GRADIO 6.x UI
# ==========================================
css = """
.gradio-container {font-family: 'Segoe UI', sans-serif;}
"""
# ✅ Gradio 6.x: title only in gr.Blocks(), theme/css go in launch()
with gr.Blocks(title="Smart Excel Analyst") as demo:
gr.Markdown("""
🤖 Smart Excel Analyst
नेपाली + English | Upload Excel → Analyze → Chat
""")
gr.Markdown("### 📁 Step 1: Upload Your Excel File")
with gr.Row():
with gr.Column(scale=3):
file_input = gr.File(
label="Choose .xlsx or .xls file",
file_types=[".xlsx", ".xls"]
)
with gr.Column(scale=1):
analyze_btn = gr.Button("📊 Analyze Data", variant="primary", size="lg")
gr.Markdown("### 🔍 Step 2: Explore Your Data")
with gr.Row():
rows_slider = gr.Slider(minimum=1, maximum=100, value=20, step=1, label="Rows to Show")
start_row = gr.Number(value=0, label="Start From Row", minimum=0, step=1)
search_box = gr.Textbox(
placeholder="Search... (Romanized Nepali supported e.g. 'rajendra')",
label="🔍 Search"
)
with gr.Row():
with gr.Column(scale=2):
preview_table = gr.Dataframe(label="Data Preview", interactive=False)
with gr.Column(scale=3):
summary_html = gr.HTML()
chart_plot = gr.Plot(label="📊 Visualization")
gr.Markdown("""
### 💬 Step 3: Ask Questions About Your Data
**Examples | उदाहरणहरू:**
- `What is the total Sales?`
- `मासिक किस्ता को जम्मा कति हो?`
- `Find details about Ram Bahadur`
- `Who has the highest loan amount?`
""")
# ✅ Gradio 6.x: NO type argument - uses dict format by default
chatbot = gr.Chatbot(height=450)
with gr.Row():
with gr.Column(scale=4):
msg_input = gr.Textbox(
placeholder="Type your question in English or Nepali...",
label="Your Question",
lines=2
)
with gr.Column(scale=1):
send_btn = gr.Button("Send 📨", variant="primary")
gr.Markdown("""
🔒 Your data stays private | Powered by Groq AI
""")
# Event Handlers
analyze_btn.click(process_file, inputs=file_input, outputs=[preview_table, summary_html, chart_plot])
rows_slider.change(get_preview, inputs=[rows_slider, start_row, search_box], outputs=preview_table)
start_row.change(get_preview, inputs=[rows_slider, start_row, search_box], outputs=preview_table)
search_box.change(get_preview, inputs=[rows_slider, start_row, search_box], outputs=preview_table)
send_btn.click(chat, inputs=[msg_input, chatbot], outputs=[msg_input, chatbot])
msg_input.submit(chat, inputs=[msg_input, chatbot], outputs=[msg_input, chatbot])
# ✅ Gradio 6.x: theme and css go in launch(), server_name for HF Spaces
demo.launch(
server_name="0.0.0.0",
server_port=7860,
theme=gr.themes.Soft(),
css=css,
ssr_mode=False
)