Kompella Sri Aasrith Souri commited on
Commit
7a65ce3
·
1 Parent(s): dadb993

Add Gradio app with model

Browse files
Files changed (2) hide show
  1. app.py +49 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import gradio as gr
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+
6
+ # Load model and tokenizer
7
+ print("Loading model...")
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
9
+ tokenizer = AutoTokenizer.from_pretrained(".", trust_remote_code=True)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ ".",
12
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
13
+ device_map=device,
14
+ trust_remote_code=True
15
+ )
16
+ print("✓ Model loaded!")
17
+
18
+ def chat(message, max_tokens, temperature):
19
+ """Generate response from model"""
20
+ inputs = tokenizer(message, return_tensors="pt")
21
+ inputs = {k: v.to(device) for k, v in inputs.items()}
22
+
23
+ with torch.no_grad():
24
+ outputs = model.generate(
25
+ **inputs,
26
+ max_new_tokens=max_tokens,
27
+ temperature=temperature,
28
+ top_p=0.9,
29
+ do_sample=True
30
+ )
31
+
32
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
33
+ return response
34
+
35
+ # Create Gradio interface
36
+ demo = gr.Interface(
37
+ fn=chat,
38
+ inputs=[
39
+ gr.Textbox(label="Message", placeholder="Ask me anything..."),
40
+ gr.Slider(minimum=10, maximum=1024, value=512, step=1, label="Max Tokens"),
41
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
42
+ ],
43
+ outputs=gr.Textbox(label="Response"),
44
+ title="Zenith Copilot",
45
+ description="Chat with your deployed model",
46
+ )
47
+
48
+ if __name__ == "__main__":
49
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ transformers
3
+ gradio