pradipraut737's picture
Update app.py
de75819 verified
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import gradio as gr
# -------------------------------
# ๐Ÿ”น Load TinyLlama once (outside the function)
# -------------------------------
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto"
)
# -------------------------------
# ๐Ÿ”น Define the main function
# -------------------------------
def generate_insights(file):
try:
# ๐Ÿ”ธ Load Excel file
df = pd.read_excel(file.name, engine="openpyxl")
# ๐Ÿ”ธ Summarize data (first 3 rows or stats)
table_text = df.head(3).to_markdown(index=False)
summary = df.describe().to_markdown()
# ๐Ÿ”ธ Prepare prompt for TinyLlama
prompt = f"""<|user|>
You are a professional data analyst. Here is a sample of the dataset:
{table_text}
And here are the summary statistics:
{summary}
Give exactly 3 clear, concise bullet-point insights based on the data. Focus only on notable patterns, trends, or anomalies.
<|assistant|>
"""
# ๐Ÿ”ธ Tokenize and generate
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
# ๐Ÿ”ธ Decode response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Remove prompt from output if needed
if "<|assistant|>" in response:
response = response.split("<|assistant|>")[-1].strip()
return response
except Exception as e:
return f"โŒ Error processing file: {str(e)}"
# -------------------------------
# ๐Ÿ”น Gradio UI
# -------------------------------
gr.Interface(
fn=generate_insights,
inputs=gr.File(label="Upload Excel File (.xlsx)"),
outputs=gr.Textbox(label="Generated Insights",lines=15),
title="Insights Generator",
description="Upload an Excel file and get data insights."
).launch()