Spaces:
Sleeping
Sleeping
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| import re | |
| from db import get_user_expenses | |
| tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct") | |
| model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct") | |
| # GPT-2 variants do not define a pad token by default. | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| if model.config.pad_token_id is None: | |
| model.config.pad_token_id = tokenizer.eos_token_id | |
| def _clean_text(text): | |
| cleaned = re.sub(r"\s+", " ", text).strip() | |
| cleaned = re.sub(r"(.)\1{5,}", r"\1\1", cleaned) | |
| return cleaned | |
| def format_expense_data(data): | |
| if not data: | |
| return "No expense data available." | |
| total = sum([d["amount"] for d in data]) | |
| category_summary = {} | |
| for d in data: | |
| cat = d.get("category", "other") | |
| category_summary[cat] = category_summary.get(cat, 0) + d["amount"] | |
| text = f"Total spending: {total}\n" | |
| for cat, amt in category_summary.items(): | |
| text += f"{cat}: {amt}\n" | |
| return text | |
| def chat_response(query, access_token): | |
| user_data = get_user_expenses(access_token) | |
| context = format_expense_data(user_data) | |
| prompt = f""" | |
| You are a smart financial advisor. | |
| User financial data: | |
| {context} | |
| User question: | |
| {query} | |
| Instructions: | |
| - Use the data to give advice | |
| - Keep answers short and practical | |
| - Do NOT generate unrelated content | |
| - If user asks for a number, include the exact number from the provided data when possible | |
| Answer: | |
| """ | |
| input_ids = tokenizer.encode(prompt + tokenizer.eos_token, return_tensors='pt') | |
| with torch.no_grad(): | |
| output = model.generate( | |
| input_ids, | |
| max_new_tokens=64, | |
| pad_token_id=tokenizer.eos_token_id, | |
| do_sample=True, | |
| temperature=0.3, | |
| top_p=0.9, | |
| no_repeat_ngram_size=3, | |
| repetition_penalty=1.2, | |
| ) | |
| response = tokenizer.decode( | |
| output[:, input_ids.shape[-1]:][0], | |
| skip_special_tokens=True | |
| ) | |
| response = response.split("Answer:")[-1].strip() | |
| response = response.split("\n")[0] | |
| response = _clean_text(response) | |
| # If model output is empty/noisy, fall back to a concise financial guidance line. | |
| if len(response) < 8: | |
| return "Focus on your top spending category and set a weekly cap to reduce non-essential expenses." | |
| return response |