Burman-AI commited on
Commit
2aebd4f
·
verified ·
1 Parent(s): 471d755

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ # Load model and tokenizer from Hugging Face
6
+ model_name = "unsloth/Llama-3.2-1b-Instruct"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_name,
10
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
11
+ device_map="auto"
12
+ )
13
+
14
+ # Chat history
15
+ chat_history = []
16
+
17
+ def generate_response(message, history):
18
+ # Combine message with history
19
+ prompt = ""
20
+ for user, bot in history:
21
+ prompt += f"<|user|>{user}<|end|><|assistant|>{bot}<|end|>"
22
+ prompt += f"<|user|>{message}<|end|><|assistant|>"
23
+
24
+ # Tokenize and generate
25
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
26
+ outputs = model.generate(
27
+ **inputs,
28
+ max_new_tokens=200,
29
+ do_sample=True,
30
+ temperature=0.7,
31
+ top_p=0.9,
32
+ pad_token_id=tokenizer.eos_token_id
33
+ )
34
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
35
+
36
+ # Extract only the assistant's latest message
37
+ response = result.split("<|assistant|>")[-1].strip()
38
+ history.append((message, response))
39
+ return response, history
40
+
41
+ # Gradio UI
42
+ chatbot = gr.ChatInterface(fn=generate_response,
43
+ title="Llama 3.2 Chatbot",
44
+ chatbot=gr.Chatbot(),
45
+ textbox=gr.Textbox(placeholder="Ask me anything...", lines=2),
46
+ clear_btn="Clear")
47
+
48
+ chatbot.launch()