krish10 commited on
Commit
80fe42a
·
verified ·
1 Parent(s): 4982a76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -39
app.py CHANGED
@@ -2,37 +2,22 @@ import spaces
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
- # Load model and tokenizer
6
  model_name = "krish10/Qwen3_0.6B_16bit_TA_screen"
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
9
 
10
- # Chat function
11
  @spaces.GPU
12
- def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
13
- # Format message history
14
- messages = []
15
- if system_message:
16
- messages.append({"role": "system", "content": system_message})
17
- for user_msg, bot_msg in history:
18
- messages.append({"role": "user", "content": user_msg})
19
- messages.append({"role": "assistant", "content": bot_msg})
20
- messages.append({"role": "user", "content": message})
21
-
22
- # Format prompt with Qwen's template
23
- prompt = tokenizer.apply_chat_template(
24
- messages,
25
- tokenize=False,
26
- add_generation_prompt=True
27
- )
28
-
29
- # Optional: print prompt for debugging
30
- print("PROMPT:\n", prompt)
31
 
32
- # Tokenize and send to GPU
33
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
34
 
35
- # Generate response
36
  outputs = model.generate(
37
  input_ids=inputs["input_ids"],
38
  max_new_tokens=max_tokens,
@@ -44,31 +29,27 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
44
 
45
  # Decode and strip prompt
46
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
47
- reply = decoded[len(prompt):]
48
- return reply
49
 
50
  # Build Gradio UI
51
  with gr.Blocks() as demo:
52
- gr.Markdown("## 🧠 Qwen Chat Interface (with proper chat template)")
53
 
54
- chatbot = gr.Chatbot()
55
- msg = gr.Textbox(label="Enter your message")
56
- system_msg = gr.Textbox(value="", label="System message (optional)")
57
- max_tokens = gr.Slider(1, 16384, value=4000, step=1, label="Max new tokens")
58
  temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
59
  top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
60
 
61
- state = gr.State([]) # for message history
62
 
63
- def user_submit(user_message, history, sys_msg, max_tokens, temp, top_p):
64
- response = respond(user_message, history, sys_msg, max_tokens, temp, top_p)
65
- history.append((user_message, response))
66
- return history, history
67
 
68
- msg.submit(
69
- user_submit,
70
- inputs=[msg, state, system_msg, max_tokens, temperature, top_p],
71
- outputs=[chatbot, state]
72
  )
73
 
74
  # Launch app
 
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
+ # Model path
6
  model_name = "krish10/Qwen3_0.6B_16bit_TA_screen"
7
+
8
+ # Load model and tokenizer
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
  model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
11
 
12
+ # Raw text-generation function (no chat formatting)
13
  @spaces.GPU
14
+ def respond(message, _, __, max_tokens, temperature, top_p):
15
+ prompt = message # Use message as-is
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Tokenize
18
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
19
 
20
+ # Generate
21
  outputs = model.generate(
22
  input_ids=inputs["input_ids"],
23
  max_new_tokens=max_tokens,
 
29
 
30
  # Decode and strip prompt
31
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
32
+ response = decoded[len(prompt):]
33
+ return response
34
 
35
  # Build Gradio UI
36
  with gr.Blocks() as demo:
37
+ gr.Markdown("## 🧠 Structured Evaluation Chat (No Template, Matches Fine-Tuning)")
38
 
39
+ msg = gr.Textbox(lines=15, label="Input your instruction + abstract (exact format as in Colab)")
40
+ system_msg = gr.Textbox(visible=False) # ignored
41
+ max_tokens = gr.Slider(1, 4096, value=512, step=1, label="Max new tokens")
 
42
  temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
43
  top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
44
 
45
+ output = gr.Textbox(lines=15, label="Model response")
46
 
47
+ btn = gr.Button("Generate")
 
 
 
48
 
49
+ btn.click(
50
+ fn=respond,
51
+ inputs=[msg, system_msg, None, max_tokens, temperature, top_p],
52
+ outputs=[output]
53
  )
54
 
55
  # Launch app