shuarya2011 commited on
Commit
3518954
·
verified ·
1 Parent(s): 14caab0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -10
app.py CHANGED
@@ -5,6 +5,7 @@ from threading import Thread
5
 
6
  model_id = "DavidAU/Qwen3-0.6B-heretic-abliterated-uncensored"
7
 
 
8
  tokenizer = AutoTokenizer.from_pretrained(model_id)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  model_id,
@@ -14,17 +15,24 @@ model = AutoModelForCausalLM.from_pretrained(
14
  )
15
 
16
  def stream_chat(message, history):
17
- # Strict prompt to stop reasoning/thinking
18
  system_message = "You are a direct assistant. Answer immediately. DO NOT explain your thinking. DO NOT show internal reasoning."
19
 
20
- # Updated History Logic for Gradio 6.0
21
  conversation = f"System: {system_message}\n"
 
 
22
  for msg in history:
23
- role = "User" if msg['role'] == 'user' else "Assistant"
24
- content = msg['content']
 
 
 
 
 
 
 
25
  conversation += f"{role}: {content}\n"
26
 
27
- # Add current message
28
  conversation += f"User: {message}\nAssistant: "
29
 
30
  inputs = tokenizer([conversation], return_tensors="pt").to("cpu")
@@ -35,7 +43,7 @@ def stream_chat(message, history):
35
  streamer=streamer,
36
  max_new_tokens=512,
37
  do_sample=True,
38
- temperature=0.3, # Low temp = No wandering into 'thinking'
39
  top_p=0.9,
40
  repetition_penalty=1.2,
41
  pad_token_id=tokenizer.eos_token_id
@@ -46,19 +54,20 @@ def stream_chat(message, history):
46
 
47
  partial_text = ""
48
  for new_text in streamer:
49
- # Stop if the model starts hallucinating typical reasoning phrases
50
  if any(stop in new_text for stop in ["User:", "Okay,", "I think", "First"]):
51
  break
52
  partial_text += new_text
53
  yield partial_text
54
 
 
55
  with gr.Blocks() as demo:
56
  gr.ChatInterface(
57
  fn=stream_chat,
58
- type="messages", # Explicitly tell Gradio to use the new message format
59
- title="QWEN3-0.6B DIRECT",
60
- description="Fixed history unpacking error and suppressed reasoning."
61
  )
62
 
63
  if __name__ == "__main__":
 
64
  demo.launch(server_name="0.0.0.0", server_port=7860, theme="soft")
 
5
 
6
  model_id = "DavidAU/Qwen3-0.6B-heretic-abliterated-uncensored"
7
 
8
+ # Load model and tokenizer
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
  model = AutoModelForCausalLM.from_pretrained(
11
  model_id,
 
15
  )
16
 
17
  def stream_chat(message, history):
18
+ # Strict system prompt to kill the internal monologue
19
  system_message = "You are a direct assistant. Answer immediately. DO NOT explain your thinking. DO NOT show internal reasoning."
20
 
 
21
  conversation = f"System: {system_message}\n"
22
+
23
+ # Robust history handling: Checks if history is list of lists or list of dicts
24
  for msg in history:
25
+ if isinstance(msg, dict):
26
+ role = "User" if msg.get('role') == 'user' else "Assistant"
27
+ content = msg.get('content', '')
28
+ else:
29
+ # Fallback for older Gradio versions (list of lists)
30
+ role, content = "User", msg[0]
31
+ conversation += f"{role}: {content}\n"
32
+ role, content = "Assistant", msg[1]
33
+
34
  conversation += f"{role}: {content}\n"
35
 
 
36
  conversation += f"User: {message}\nAssistant: "
37
 
38
  inputs = tokenizer([conversation], return_tensors="pt").to("cpu")
 
43
  streamer=streamer,
44
  max_new_tokens=512,
45
  do_sample=True,
46
+ temperature=0.3, # Low temp ensures the model doesn't drift into 'thinking'
47
  top_p=0.9,
48
  repetition_penalty=1.2,
49
  pad_token_id=tokenizer.eos_token_id
 
54
 
55
  partial_text = ""
56
  for new_text in streamer:
57
+ # Hard stop if model tries to start its 'Okay, let me think' routine
58
  if any(stop in new_text for stop in ["User:", "Okay,", "I think", "First"]):
59
  break
60
  partial_text += new_text
61
  yield partial_text
62
 
63
+ # Removed 'type="messages"' to fix the TypeError
64
  with gr.Blocks() as demo:
65
  gr.ChatInterface(
66
  fn=stream_chat,
67
+ title="QWEN3-0.6B DIRECT MAX",
68
+ description="Reasoning suppressed. Fast, direct, uncensored responses."
 
69
  )
70
 
71
  if __name__ == "__main__":
72
+ # Passing theme here as per Gradio 6.0 logs
73
  demo.launch(server_name="0.0.0.0", server_port=7860, theme="soft")