import torch import gradio as gr from transformers import pipeline, AutoTokenizer from datetime import datetime from db import chat_history_collection # Load Model & Tokenizer MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" pipe = pipeline( "text-generation", model=MODEL_NAME, torch_dtype=torch.bfloat16, device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # Load tokenizer def clean_response(response_text): """Removes unwanted system, user, and assistant tags from the response.""" if "<|assistant|>" in response_text: return response_text.split("<|assistant|>")[-1].strip() return response_text.strip() def chatbot_response(user_message, session_id="default_session", user_id="user_123"): """Generate a chatbot response using context from past chats.""" try: # Fetch last 10 messages past_chats = list(chat_history_collection.find( {"session_id": session_id}).sort("timestamp", -1).limit(10) ) messages = [{"role": "system", "content": "You are a friendly chatbot."}] # Add past messages to maintain context for chat in reversed(past_chats): messages.append({"role": "user", "content": chat["message"]}) messages.append({"role": "assistant", "content": chat["response"]}) # Append new user message messages.append({"role": "user", "content": user_message}) # Generate prompt for model prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) # Generate response outputs = pipe(prompt, max_new_tokens=150, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) raw_response = outputs[0]["generated_text"] # Clean response cleaned_response = clean_response(raw_response) # Save to database chat_data = { "session_id": session_id, "user_id": user_id, "message": user_message, "response": cleaned_response, "timestamp": datetime.utcnow() } chat_history_collection.insert_one(chat_data) return cleaned_response except Exception as e: return f"Error: {str(e)}" # Gradio UI iface = gr.Interface( fn=chatbot_response, inputs=gr.Textbox(label="User Message"), outputs=gr.Textbox(label="Chatbot Response"), title="TinyLlama Chatbot", description="Chat with an AI-powered assistant.", live=True ) # Run Gradio app if __name__ == "__main__": iface.launch(share=True)