Spaces:

sreejang
/

Excel_Analyze

Sleeping

App Files Files Community

Excel_Analyze / app.py

sreejang

Update app.py

1db0cf9 verified 25 days ago

raw

history blame contribute delete

13.6 kB

	# ==========================================
	# Smart Excel Analyst - Hugging Face Space
	# Nepali + English Excel Analytics with AI Chat
	# ==========================================

	import gradio as gr
	import pandas as pd
	import os
	import plotly.graph_objects as go
	from groq import Groq
	from unidecode import unidecode
	from indic_transliteration import sanscript

	# ==========================================
	# CONFIG
	# ==========================================
	GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
	if not GROQ_API_KEY:
	GROQ_API_KEY = "your-groq-api-key-here"

	current_df = None
	client = None

	def get_client():
	global client
	if client is None and GROQ_API_KEY and GROQ_API_KEY != "your-groq-api-key-here":
	client = Groq(api_key=GROQ_API_KEY)
	return client

	# ==========================================
	# HELPERS
	# ==========================================
	def format_num(n):
	try:
	if pd.isna(n):
	return "—"
	n = float(n)
	if abs(n) >= 10_000_000:
	return f"₹{n/10_000_000:.2f} करोड"
	elif abs(n) >= 100_000:
	return f"₹{n/100_000:.2f} लाख"
	elif abs(n) >= 1_000:
	return f"₹{n:,.0f}"
	else:
	return f"₹{n:,.2f}"
	except:
	return str(n)


	def df_to_chunks(df, chunk_size=50):
	chunks = []
	total_rows = len(df)
	for start in range(0, total_rows, chunk_size):
	end = min(start + chunk_size, total_rows)
	chunk_df = df.iloc[start:end]
	rows_text = []
	for idx, row in chunk_df.iterrows():
	row_str = " \| ".join([f"{col}: {row[col]}" for col in df.columns])
	rows_text.append(f"Row {idx + 1}: {row_str}")
	chunks.append("\n".join(rows_text))
	return chunks


	def find_relevant_chunks(query, chunks, df, max_chunks=4):
	query_lower = query.lower()
	query_words = set(query_lower.split())
	scored_chunks = []
	for i, chunk in enumerate(chunks):
	chunk_lower = chunk.lower()
	score = sum(1 for word in query_words if word in chunk_lower)
	scored_chunks.append((score, i, chunk))
	scored_chunks.sort(key=lambda x: x[0], reverse=True)
	top_chunks = [c[2] for c in scored_chunks[:max_chunks] if c[0] > 0]
	if not top_chunks:
	top_chunks = [c[2] for c in scored_chunks[:2]]
	return top_chunks


	def get_summary(df):
	rows, cols, miss = len(df), len(df.columns), int(df.isnull().sum().sum())
	nums = df.select_dtypes(include=["number"]).columns.tolist()

	html = f"""
	<div style="padding:15px; font-family:Arial, sans-serif;">
	<div style="display:grid; grid-template-columns:repeat(3, 1fr); gap:10px; margin-bottom:15px;">
	<div style="background:linear-gradient(135deg,#667eea,#764ba2); color:white; padding:15px; border-radius:10px; text-align:center;">
	<div style="font-size:28px; font-weight:bold;">{rows}</div>
	<div style="font-size:12px;">जम्मा रेकर्ड<br>Total Records</div>
	</div>
	<div style="background:linear-gradient(135deg,#f5576c,#f093fb); color:white; padding:15px; border-radius:10px; text-align:center;">
	<div style="font-size:28px; font-weight:bold;">{cols}</div>
	<div style="font-size:12px;">कुल कलम<br>Total Columns</div>
	</div>
	<div style="background:linear-gradient(135deg,#4facfe,#00f2fe); color:white; padding:15px; border-radius:10px; text-align:center;">
	<div style="font-size:28px; font-weight:bold;">{miss}</div>
	<div style="font-size:12px;">अपूर्ण डेटा<br>Missing Data</div>
	</div>
	</div>
	<table style="width:100%; font-size:13px; border-collapse:collapse;">
	<thead>
	<tr style="background:#34495e; color:white;">
	<th style="padding:10px; text-align:left;">Column</th>
	<th style="padding:10px; text-align:center;">Type</th>
	<th style="padding:10px; text-align:center;">Status</th>
	</tr>
	</thead>
	<tbody>
	"""
	for col in df.columns:
	is_num = pd.api.types.is_numeric_dtype(df[col])
	t = "💰 Number" if is_num else "📝 Text"
	m = df[col].isnull().sum()
	s = "✓ OK" if m == 0 else f"⚠ {int(m)} missing"
	cl = "#27ae60" if m == 0 else "#e74c3c"
	html += f'<tr style="border-bottom:1px solid #ecf0f1;"><td style="padding:10px; font-weight:500;">{col}</td><td style="padding:10px; text-align:center;">{t}</td><td style="padding:10px; text-align:center; color:{cl}; font-weight:600;">{s}</td></tr>'

	html += "</tbody></table>"

	if nums:
	html += '<div style="margin-top:15px;">'
	for col in nums[:3]:
	html += f'''
	<div style="background:#f8f9fa; padding:12px; margin-bottom:8px; border-radius:8px; border-left:4px solid #3498db;">
	<div style="font-weight:700; margin-bottom:4px; color:#2c3e50;">{col}</div>
	<div style="color:#555; font-size:13px;">
	Total: <b>{format_num(df[col].sum())}</b> \|
	Average: <b>{format_num(df[col].mean())}</b>
	</div>
	</div>'''
	html += '</div>'

	return html + "</div>"


	def make_chart(df):
	nums = df.select_dtypes(include=["number"]).columns.tolist()
	if not nums:
	return None
	vals, labs = [], []
	for col in nums[:5]:
	s = df[col].sum()
	if pd.notna(s) and s != 0:
	vals.append(float(s))
	labs.append(str(col)[:12])
	if not vals:
	return None
	fig = go.Figure([go.Bar(
	x=labs, y=vals,
	marker_color=['#667eea', '#f5576c', '#4facfe', '#43e97b', '#fa709a']
	)])
	fig.update_layout(
	title="जम्मा रकम तुलना \| Total Comparison",
	height=350, template="plotly_white",
	xaxis_title="Columns", yaxis_title="Amount"
	)
	return fig


	def get_preview(rows, start, search):
	global current_df
	if current_df is None:
	return pd.DataFrame()
	df = current_df.copy()
	if search and search.strip():
	search_term = search.strip().lower()
	def row_matches(row):
	for cell in row:
	cell_str = str(cell)
	try:
	cell_roman = sanscript.transliterate(
	cell_str, sanscript.DEVANAGARI, sanscript.ITRANS
	).lower()
	except:
	cell_roman = cell_str.lower()
	if search_term in cell_roman:
	return True
	return False
	mask = df.apply(row_matches, axis=1)
	df = df[mask]
	start = max(0, int(start))
	rows = max(1, int(rows))
	rows = min(rows, len(df) - start)
	if rows <= 0:
	return pd.DataFrame()
	return df.iloc[start:start + rows]


	# ==========================================
	# CORE FUNCTIONS
	# ==========================================
	def process_file(f):
	global current_df
	if f is None:
	return None, "❌ Please upload an Excel file", None
	try:
	current_df = pd.read_excel(f)
	return get_preview(20, 0, ""), get_summary(current_df), make_chart(current_df)
	except Exception as e:
	return None, f"❌ Error: {str(e)}", None


	def chat(msg, hist):
	global current_df

	if not msg or not msg.strip():
	return "", hist

	# ✅ Gradio 6.x: dict format with role/content
	if current_df is None:
	hist.append({"role": "user", "content": msg})
	hist.append({"role": "assistant", "content": "❌ Please upload an Excel file first!"})
	return "", hist

	groq_client = get_client()
	if not groq_client:
	hist.append({"role": "user", "content": msg})
	hist.append({"role": "assistant", "content": "❌ API key not configured. Please set GROQ_API_KEY in Space secrets."})
	return "", hist

	try:
	all_chunks = df_to_chunks(current_df, chunk_size=50)
	relevant_chunks = find_relevant_chunks(msg, all_chunks, current_df, max_chunks=4)
	data_context = "\n\n".join(relevant_chunks)

	cols = list(current_df.columns)
	nums = current_df.select_dtypes(include=["number"]).columns.tolist()
	stats_lines = []
	for col in nums:
	stats_lines.append(
	f"{col} → Sum: {current_df[col].sum():.2f}, "
	f"Mean: {current_df[col].mean():.2f}, "
	f"Max: {current_df[col].max():.2f}, "
	f"Min: {current_df[col].min():.2f}"
	)
	stats_text = "\n".join(stats_lines) if stats_lines else "No numeric columns."

	system_prompt = """You are a smart data analyst assistant. You understand both English and Nepali.
	Answer using ONLY the provided data. Be specific and reference actual values.
	If the answer is not in the data, say "This information is not available in the uploaded Excel file."
	Respond in the same language as the user's question."""

	user_message = f"""Excel Columns: {cols}

	Numeric Statistics:
	{stats_text}

	Data Sample:
	{data_context}

	Question: {msg}

	Answer based ONLY on the data above."""

	response = groq_client.chat.completions.create(
	model="llama-3.3-70b-versatile",
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_message}
	],
	temperature=0.2,
	max_tokens=1024
	)

	reply = response.choices[0].message.content.strip()

	# ✅ Gradio 6.x dict message format
	hist.append({"role": "user", "content": msg})
	hist.append({"role": "assistant", "content": f"🤖 {reply}"})
	return "", hist

	except Exception as e:
	hist.append({"role": "user", "content": msg})
	hist.append({"role": "assistant", "content": f"❌ Error: {str(e)}"})
	return "", hist


	# ==========================================
	# GRADIO 6.x UI
	# ==========================================
	css = """
	.gradio-container {font-family: 'Segoe UI', sans-serif;}
	"""

	# ✅ Gradio 6.x: title only in gr.Blocks(), theme/css go in launch()
	with gr.Blocks(title="Smart Excel Analyst") as demo:

	gr.Markdown("""
	<div style="text-align:center; padding:25px; background:linear-gradient(135deg,#667eea 0%,#764ba2 100%); color:white; border-radius:15px; margin-bottom:20px;">
	<h1 style="margin:0; font-size:2.2em;">🤖 Smart Excel Analyst</h1>
	<p style="margin:10px 0 0 0; font-size:1.1em; opacity:0.95;">
	नेपाली + English \| Upload Excel → Analyze → Chat
	</p>
	</div>
	""")

	gr.Markdown("### 📁 Step 1: Upload Your Excel File")
	with gr.Row():
	with gr.Column(scale=3):
	file_input = gr.File(
	label="Choose .xlsx or .xls file",
	file_types=[".xlsx", ".xls"]
	)
	with gr.Column(scale=1):
	analyze_btn = gr.Button("📊 Analyze Data", variant="primary", size="lg")

	gr.Markdown("### 🔍 Step 2: Explore Your Data")
	with gr.Row():
	rows_slider = gr.Slider(minimum=1, maximum=100, value=20, step=1, label="Rows to Show")
	start_row = gr.Number(value=0, label="Start From Row", minimum=0, step=1)
	search_box = gr.Textbox(
	placeholder="Search... (Romanized Nepali supported e.g. 'rajendra')",
	label="🔍 Search"
	)

	with gr.Row():
	with gr.Column(scale=2):
	preview_table = gr.Dataframe(label="Data Preview", interactive=False)
	with gr.Column(scale=3):
	summary_html = gr.HTML()

	chart_plot = gr.Plot(label="📊 Visualization")

	gr.Markdown("""
	### 💬 Step 3: Ask Questions About Your Data

	Examples \| उदाहरणहरू:
	- `What is the total Sales?`
	- `मासिक किस्ता को जम्मा कति हो?`
	- `Find details about Ram Bahadur`
	- `Who has the highest loan amount?`
	""")

	# ✅ Gradio 6.x: NO type argument - uses dict format by default
	chatbot = gr.Chatbot(height=450)

	with gr.Row():
	with gr.Column(scale=4):
	msg_input = gr.Textbox(
	placeholder="Type your question in English or Nepali...",
	label="Your Question",
	lines=2
	)
	with gr.Column(scale=1):
	send_btn = gr.Button("Send 📨", variant="primary")

	gr.Markdown("""
	<div style="text-align:center; padding:15px; color:#7f8c8d; margin-top:20px; border-top:1px solid #ecf0f1; font-size:12px;">
	🔒 Your data stays private \| Powered by Groq AI
	</div>
	""")

	# Event Handlers
	analyze_btn.click(process_file, inputs=file_input, outputs=[preview_table, summary_html, chart_plot])
	rows_slider.change(get_preview, inputs=[rows_slider, start_row, search_box], outputs=preview_table)
	start_row.change(get_preview, inputs=[rows_slider, start_row, search_box], outputs=preview_table)
	search_box.change(get_preview, inputs=[rows_slider, start_row, search_box], outputs=preview_table)
	send_btn.click(chat, inputs=[msg_input, chatbot], outputs=[msg_input, chatbot])
	msg_input.submit(chat, inputs=[msg_input, chatbot], outputs=[msg_input, chatbot])

	# ✅ Gradio 6.x: theme and css go in launch(), server_name for HF Spaces
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	theme=gr.themes.Soft(),
	css=css,
	ssr_mode=False
	)