dprat0821 commited on
Commit
025e6ce
·
verified ·
1 Parent(s): d395b9d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ # Load DeepSeek model
6
+ model_id = "deepseek-ai/deepseek-llm-7b-chat"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
8
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
9
+
10
+ # Inference function
11
+ def generate_response(prompt, temperature, top_p, max_tokens, repetition_penalty):
12
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
13
+ outputs = model.generate(
14
+ **inputs,
15
+ do_sample=True,
16
+ temperature=temperature,
17
+ top_p=top_p,
18
+ max_new_tokens=max_tokens,
19
+ repetition_penalty=repetition_penalty
20
+ )
21
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
22
+
23
+ # Gradio UI
24
+ iface = gr.Interface(
25
+ fn=generate_response,
26
+ inputs=[
27
+ gr.Textbox(label="Prompt", lines=6, placeholder="Ask something..."),
28
+ gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
29
+ gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
30
+ gr.Slider(32, 2048, value=512, step=32, label="Max New Tokens"),
31
+ gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
32
+ ],
33
+ outputs="text",
34
+ title="🧠 DeepSeek LLM Chat with Parameter Tuning",
35
+ theme="soft"
36
+ )
37
+
38
+ if __name__ == "__main__":
39
+ iface.launch()