braindeck commited on
Commit
7687458
·
1 Parent(s): ffa9728

Update app.py to use fine-tuned model

Browse files
Files changed (1) hide show
  1. app.py +10 -11
app.py CHANGED
@@ -1,21 +1,20 @@
1
-
2
  import gradio as gr
3
- from vllm import LLM, SamplingParams
 
4
 
5
- # Load the model
6
- llm = LLM(model="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", trust_remote_code=True)
 
7
 
8
  def generate_response(prompt):
9
  """
10
  Generates a response from the model.
11
  """
12
- sampling_params = SamplingParams(temperature=0.0, top_p=1.0, max_tokens=512)
13
- outputs = llm.generate(prompt, sampling_params)
14
 
15
- # Extract the generated text
16
- generated_text = ""
17
- for output in outputs:
18
- generated_text += output.outputs[0].text
19
 
20
  return generated_text
21
 
@@ -40,4 +39,4 @@ with gr.Blocks() as demo:
40
  )
41
 
42
  if __name__ == "__main__":
43
- demo.launch()
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
 
5
+ # Load the model and tokenizer
6
+ tokenizer = AutoTokenizer.from_pretrained("braindeck/text2text", trust_remote_code=True, subfolder="checkpoints/model")
7
+ model = AutoModelForCausalLM.from_pretrained("braindeck/text2text", trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto", subfolder="checkpoints/model")
8
 
9
  def generate_response(prompt):
10
  """
11
  Generates a response from the model.
12
  """
13
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
14
+ outputs = model.generate(**inputs, max_new_tokens=512)
15
 
16
+ # Decode the generated text
17
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
18
 
19
  return generated_text
20
 
 
39
  )
40
 
41
  if __name__ == "__main__":
42
+ demo.launch()