Barisha commited on
Commit
1f74418
·
verified ·
1 Parent(s): 76b2122

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ MODEL_NAME = "nvidia/OpenGPT-OSS-20B"
6
+
7
+ print("Loading model... this may take some time.")
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ MODEL_NAME,
12
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
13
+ device_map="auto"
14
+ )
15
+
16
+ def chat(prompt, history):
17
+ messages = ""
18
+ for user, bot in history:
19
+ messages += f"User: {user}\nAssistant: {bot}\n"
20
+ messages += f"User: {prompt}\nAssistant:"
21
+
22
+ inputs = tokenizer(messages, return_tensors="pt").to(model.device)
23
+ outputs = model.generate(
24
+ **inputs,
25
+ max_new_tokens=300,
26
+ temperature=0.7,
27
+ do_sample=True,
28
+ top_p=0.9,
29
+ )
30
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
31
+ # Remove the previous conversation from output
32
+ reply = response.split("Assistant:")[-1].strip()
33
+ history.append((prompt, reply))
34
+ return reply, history
35
+
36
+
37
+ with gr.Blocks() as demo:
38
+ gr.Markdown("# 🧠 GPT-OSS-20B Chat (HuggingFace Space)")
39
+ chatbot = gr.Chatbot()
40
+ message = gr.Textbox(label="Type your message")
41
+ clear = gr.Button("Clear Chat")
42
+
43
+ def user_submit(msg, history):
44
+ reply, history = chat(msg, history)
45
+ return "", history
46
+
47
+ message.submit(user_submit, [message, chatbot], [message, chatbot])
48
+ clear.click(lambda: None, None, chatbot)
49
+
50
+ demo.launch()