Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype=torch.float32 | |
| ) | |
| def build_prompt(message, history): | |
| prompt = "" | |
| for user_msg, bot_msg in history: | |
| prompt += f"<|user|>\n{user_msg}\n<|assistant|>\n{bot_msg}\n" | |
| prompt += f"<|user|>\n{message}\n<|assistant|>\n" | |
| return prompt | |
| def chat(message, history): | |
| prompt = build_prompt(message, history) | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=200, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9, | |
| repetition_penalty=1.1, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| answer = full_text[len(tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)):].strip() | |
| if not answer: | |
| answer = "I am here. Ask me something." | |
| return answer | |
| demo = gr.ChatInterface( | |
| fn=chat, | |
| title="My Local LLM Chat", | |
| description="TinyLlama chatbot running locally without HF_TOKEN", | |
| examples=[ | |
| "Hello", | |
| "Who are you?", | |
| "Explain pain in simple words", | |
| "Write a short Python code" | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |