import pandas as pd from transformers import AutoTokenizer, AutoModelForCausalLM import torch import gradio as gr # ------------------------------- # 🔹 Load TinyLlama once (outside the function) # ------------------------------- model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) # ------------------------------- # 🔹 Define the main function # ------------------------------- def generate_insights(file): try: # 🔸 Load Excel file df = pd.read_excel(file.name, engine="openpyxl") # 🔸 Summarize data (first 3 rows or stats) table_text = df.head(3).to_markdown(index=False) summary = df.describe().to_markdown() # 🔸 Prepare prompt for TinyLlama prompt = f"""<|user|> You are a professional data analyst. Here is a sample of the dataset: {table_text} And here are the summary statistics: {summary} Give exactly 3 clear, concise bullet-point insights based on the data. Focus only on notable patterns, trends, or anomalies. <|assistant|> """ # 🔸 Tokenize and generate inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=256, temperature=0.7, top_p=0.9, do_sample=True, pad_token_id=tokenizer.eos_token_id ) # 🔸 Decode response response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Remove prompt from output if needed if "<|assistant|>" in response: response = response.split("<|assistant|>")[-1].strip() return response except Exception as e: return f"❌ Error processing file: {str(e)}" # ------------------------------- # 🔹 Gradio UI # ------------------------------- gr.Interface( fn=generate_insights, inputs=gr.File(label="Upload Excel File (.xlsx)"), outputs=gr.Textbox(label="Generated Insights",lines=15), title="Insights Generator", description="Upload an Excel file and get data insights." ).launch()