AxionLab-official commited on
Commit
02f0452
·
verified ·
1 Parent(s): a14e4fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -21
app.py CHANGED
@@ -1,20 +1,19 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import os
5
 
6
  # =========================
7
  # CONFIG
8
  # =========================
9
  MODEL_ID = "AxionLab-Co/DogeAI-v2.0-4B-Reasoning"
10
- MAX_NEW_TOKENS = 1024
11
 
12
  tokenizer = None
13
  model = None
14
 
15
 
16
  # =========================
17
- # LOAD MODEL (LAZY)
18
  # =========================
19
  def load_model():
20
  global tokenizer, model
@@ -28,7 +27,7 @@ def load_model():
28
  model = AutoModelForCausalLM.from_pretrained(
29
  MODEL_ID,
30
  device_map="cpu",
31
- dtype=torch.float32,
32
  low_cpu_mem_usage=True
33
  )
34
 
@@ -38,17 +37,19 @@ def load_model():
38
 
39
 
40
  # =========================
41
- # PROMPT (REASONING-FIRST)
42
  # =========================
43
- def build_prompt(user_input):
44
- return f"""
45
- You are DogeAI-v2.0-4B-Reasoning.
46
 
47
- You MUST think step by step.
48
- Break the problem into parts.
49
- Reason explicitly before answering.
50
- Then provide a clear final answer.
51
- If the user is speaking Brazilian Portuguese, use brazilian slangs, be the Doge guy, but don't stop thinking seriously. 🐕🇧🇷
 
 
 
52
 
53
  User:
54
  {user_input}
@@ -58,11 +59,14 @@ Assistant:
58
 
59
 
60
  # =========================
61
- # CHAT FUNCTION
62
  # =========================
63
  def chat(user_input):
64
  tokenizer, model = load_model()
65
 
 
 
 
66
  prompt = build_prompt(user_input)
67
 
68
  inputs = tokenizer(
@@ -84,7 +88,10 @@ def chat(user_input):
84
  skip_special_tokens=True
85
  )
86
 
87
- return text.replace(prompt, "").strip()
 
 
 
88
 
89
 
90
  # =========================
@@ -93,22 +100,22 @@ def chat(user_input):
93
  with gr.Blocks(title="DogeAI-v2.0-4B-Reasoning") as demo:
94
  gr.Markdown(
95
  "# 🐕 DogeAI-v2.0-4B-Reasoning\n"
96
- "**4-bit reasoning model running on HF Space (CPU)**\n\n"
97
- "Focused on explicit thinking, not raw speed."
98
  )
99
 
100
  input_box = gr.Textbox(
101
- label="Your question",
102
- placeholder="Ask something that requires reasoning...",
103
  lines=4
104
  )
105
 
106
  output_box = gr.Textbox(
107
- label="DogeAI Response",
108
  lines=14
109
  )
110
 
111
- run_btn = gr.Button("Think 🧠")
112
 
113
  run_btn.click(
114
  fn=chat,
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
4
 
5
  # =========================
6
  # CONFIG
7
  # =========================
8
  MODEL_ID = "AxionLab-Co/DogeAI-v2.0-4B-Reasoning"
9
+ MAX_NEW_TOKENS = 512 # menor = menos timeout em CPU
10
 
11
  tokenizer = None
12
  model = None
13
 
14
 
15
  # =========================
16
+ # LOAD MODEL (LAZY + SAFE)
17
  # =========================
18
  def load_model():
19
  global tokenizer, model
 
27
  model = AutoModelForCausalLM.from_pretrained(
28
  MODEL_ID,
29
  device_map="cpu",
30
+ torch_dtype=torch.float32,
31
  low_cpu_mem_usage=True
32
  )
33
 
 
37
 
38
 
39
  # =========================
40
+ # PROMPT (CPU-FRIENDLY)
41
  # =========================
42
+ def build_prompt(user_input: str) -> str:
43
+ return f"""You are DogeAI-v2.0-4B-Reasoning.
 
44
 
45
+ Think step by step internally.
46
+ Do not reveal your full chain-of-thought.
47
+ Provide a clear final answer with a short explanation.
48
+
49
+ If the user speaks Brazilian Portuguese:
50
+ - use Brazilian slang lightly
51
+ - keep the Doge vibe 🐕🇧🇷
52
+ - stay serious and logical
53
 
54
  User:
55
  {user_input}
 
59
 
60
 
61
  # =========================
62
+ # CHAT FUNCTION (SSE-SAFE)
63
  # =========================
64
  def chat(user_input):
65
  tokenizer, model = load_model()
66
 
67
+ # mantém o SSE vivo imediatamente
68
+ yield "🤔 DogeAI está pensando... segura aí..."
69
+
70
  prompt = build_prompt(user_input)
71
 
72
  inputs = tokenizer(
 
88
  skip_special_tokens=True
89
  )
90
 
91
+ # remove o prompt da resposta final
92
+ response = text.split("Assistant:", 1)[-1].strip()
93
+
94
+ yield response
95
 
96
 
97
  # =========================
 
100
  with gr.Blocks(title="DogeAI-v2.0-4B-Reasoning") as demo:
101
  gr.Markdown(
102
  "# 🐕 DogeAI-v2.0-4B-Reasoning\n"
103
+ "**4B reasoning model rodando em CPU no HF Space**\n\n"
104
+ "Pensamento explícito interno, resposta clara externa."
105
  )
106
 
107
  input_box = gr.Textbox(
108
+ label="Pergunta",
109
+ placeholder="Pergunta que exige raciocínio de verdade...",
110
  lines=4
111
  )
112
 
113
  output_box = gr.Textbox(
114
+ label="Resposta do DogeAI",
115
  lines=14
116
  )
117
 
118
+ run_btn = gr.Button("Pensar 🧠🐕")
119
 
120
  run_btn.click(
121
  fn=chat,