maxdougly commited on
Commit
985b94e
·
verified ·
1 Parent(s): fc5bec5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -1
app.py CHANGED
@@ -1,3 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  @spaces.GPU(duration=120) # Decorate the function for ZeroGPU
2
  def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, min_p):
3
  # Construct messages for the chat template
@@ -18,7 +30,7 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
18
  )
19
 
20
  # Ensure input_ids is extracted
21
- input_ids = inputs # Tensor provided directly
22
  print("Input IDs shape:", input_ids.shape)
23
 
24
  # Generate response
@@ -30,8 +42,27 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
30
  min_p=min_p,
31
  )
32
 
 
 
 
 
33
  # Decode and format the response
34
  response = tokenizer.decode(output[0], skip_special_tokens=True)
35
 
36
  # Yield the response
37
  yield response.split("assistant")[-1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["CUDA_VISIBLE_DEVICES"] = "" # Prevent CUDA initialization outside ZeroGPU
3
+
4
+ import spaces # Import spaces first
5
+ import gradio as gr
6
+ from peft import AutoPeftModelForCausalLM
7
+ from transformers import AutoTokenizer
8
+
9
+ # Load the model and tokenizer globally
10
+ model = AutoPeftModelForCausalLM.from_pretrained("eforse01/lora_model")
11
+ tokenizer = AutoTokenizer.from_pretrained("eforse01/lora_model")
12
+
13
  @spaces.GPU(duration=120) # Decorate the function for ZeroGPU
14
  def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, min_p):
15
  # Construct messages for the chat template
 
30
  )
31
 
32
  # Ensure input_ids is extracted
33
+ input_ids = inputs # Directly using tensor returned from apply_chat_template
34
  print("Input IDs shape:", input_ids.shape)
35
 
36
  # Generate response
 
42
  min_p=min_p,
43
  )
44
 
45
+ # Debug output
46
+ print("Generated Output Shape:", output.shape)
47
+ print("Generated Output:", output)
48
+
49
  # Decode and format the response
50
  response = tokenizer.decode(output[0], skip_special_tokens=True)
51
 
52
  # Yield the response
53
  yield response.split("assistant")[-1]
54
+
55
+
56
+ # Gradio Interface
57
+ demo = gr.ChatInterface(
58
+ respond,
59
+ additional_inputs=[
60
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
61
+ gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens"),
62
+ gr.Slider(minimum=0.1, maximum=4.0, value=1.5, step=0.1, label="Temperature"),
63
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.99, step=0.01, label="Min-p"),
64
+ ],
65
+ )
66
+
67
+ if __name__ == "__main__":
68
+ demo.launch()