krish10 commited on
Commit
3445414
·
verified ·
1 Parent(s): 3c83c25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -12
app.py CHANGED
@@ -4,7 +4,7 @@ from transformers import pipeline, TextIteratorStreamer
4
  import torch
5
  import threading
6
 
7
- # Load model pipeline
8
  model_name = "krish10/Qwen3_0.6B_16bit_TA_screen"
9
  pipe = pipeline("text-generation", model=model_name, device=0)
10
  tokenizer = pipe.tokenizer
@@ -15,7 +15,7 @@ MAX_TOKENS = 3000
15
  TEMPERATURE = 0.1
16
  TOP_P = 0.9
17
 
18
- # Response function using a streamer
19
  @spaces.GPU
20
  def respond_stream(user_input):
21
  messages = [{"role": "user", "content": user_input}]
@@ -37,30 +37,31 @@ def respond_stream(user_input):
37
  thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
38
  thread.start()
39
 
40
- # Yield tokens as they come
41
- for token in streamer:
42
- yield token
 
43
 
44
- # Gradio UI with streaming
45
  with gr.Blocks() as demo:
46
- gr.Markdown("## 🤖 Qwen Chat with Streaming (Fixed Temp/Top-p/Max Tokens)")
47
 
48
  input_box = gr.Textbox(
49
  lines=12,
50
  label="Your input (instruction + abstract)",
51
  placeholder="Instruction: Population = ...\nAbstract: ..."
52
  )
53
- output_box = gr.Textbox(lines=12, label="Model Response")
 
 
54
  generate_btn = gr.Button("Generate")
55
 
56
  generate_btn.click(
57
  fn=respond_stream,
58
  inputs=[input_box],
59
- outputs=[output_box],
60
- api_name="chat",
61
- stream=True
62
  )
63
 
64
- # Launch app
65
  if __name__ == "__main__":
66
  demo.launch()
 
4
  import torch
5
  import threading
6
 
7
+ # Load pipeline and components
8
  model_name = "krish10/Qwen3_0.6B_16bit_TA_screen"
9
  pipe = pipeline("text-generation", model=model_name, device=0)
10
  tokenizer = pipe.tokenizer
 
15
  TEMPERATURE = 0.1
16
  TOP_P = 0.9
17
 
18
+ # Generator function for streaming
19
  @spaces.GPU
20
  def respond_stream(user_input):
21
  messages = [{"role": "user", "content": user_input}]
 
37
  thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
38
  thread.start()
39
 
40
+ partial_text = ""
41
+ for new_token in streamer:
42
+ partial_text += new_token
43
+ yield partial_text
44
 
45
+ # Gradio interface with live streaming (no stream=True)
46
  with gr.Blocks() as demo:
47
+ gr.Markdown("## 🤖 Qwen Streaming Chat (Fixed Parameters)")
48
 
49
  input_box = gr.Textbox(
50
  lines=12,
51
  label="Your input (instruction + abstract)",
52
  placeholder="Instruction: Population = ...\nAbstract: ..."
53
  )
54
+
55
+ output_box = gr.Textbox(lines=12, label="Model Response", interactive=False)
56
+
57
  generate_btn = gr.Button("Generate")
58
 
59
  generate_btn.click(
60
  fn=respond_stream,
61
  inputs=[input_box],
62
+ outputs=[output_box]
 
 
63
  )
64
 
65
+ # Launch
66
  if __name__ == "__main__":
67
  demo.launch()