krish10 commited on
Commit
be16a0b
·
verified ·
1 Parent(s): 903043a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ from transformers import pipeline, TextIteratorStreamer
4
+ import torch
5
+ import threading
6
+
7
+ # Load model and tokenizer
8
+ model_name = "krish10/Qwen3_14B_16bit_TA_screen_v1.0"
9
+ pipe = pipeline("text-generation", model=model_name, device=0)
10
+ tokenizer = pipe.tokenizer
11
+ model = pipe.model
12
+
13
+ # Fixed generation config
14
+ MAX_TOKENS = 3000
15
+ TEMPERATURE = 0.1
16
+ TOP_P = 0.9
17
+
18
+ @spaces.GPU
19
+ def respond_stream(title, abstract):
20
+ if not title.strip() or not abstract.strip():
21
+ return "❌ Error: Title and Abstract are required."
22
+
23
+ prompt = f"Title: {title.strip()}\nAbstract: {abstract.strip()}"
24
+
25
+ messages = [{"role": "user", "content": prompt}]
26
+ prompt_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
27
+
28
+ inputs = tokenizer(prompt_text, return_tensors="pt").to("cuda")
29
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
30
+
31
+ generation_kwargs = dict(
32
+ input_ids=inputs["input_ids"],
33
+ streamer=streamer,
34
+ max_new_tokens=MAX_TOKENS,
35
+ temperature=TEMPERATURE,
36
+ top_p=TOP_P,
37
+ do_sample=True,
38
+ pad_token_id=tokenizer.eos_token_id,
39
+ )
40
+
41
+ thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
42
+ thread.start()
43
+
44
+ partial_text = ""
45
+ for token in streamer:
46
+ partial_text += token
47
+ yield partial_text
48
+
49
+ # Build Gradio interface
50
+ with gr.Blocks() as demo:
51
+ gr.Markdown("## 🤖 Qwen Streaming Chat — Medical Abstract Evaluator")
52
+
53
+ with gr.Column():
54
+ title = gr.Textbox(label="Title", lines=2, placeholder="Required")
55
+ abstract = gr.Textbox(label="Abstract", lines=10, placeholder="Required")
56
+
57
+ output_box = gr.Textbox(label="Model Response", lines=15, interactive=False)
58
+ generate_btn = gr.Button("Generate")
59
+
60
+ generate_btn.click(
61
+ fn=respond_stream,
62
+ inputs=[title, abstract],
63
+ outputs=[output_box]
64
+ )
65
+
66
+ # Launch the app
67
+ if __name__ == "__main__":
68
+ demo.launch()