Spaces:

AxionLab-Co
/

DogeAI-v2.0-4B-Reasoning-Server

Running

App Files Files Community

AxionLab-official commited on about 16 hours ago

Commit

02f0452

verified ·

1 Parent(s): a14e4fe

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -21

app.py CHANGED Viewed

@@ -1,20 +1,19 @@
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
-import os
 # =========================
 # CONFIG
 # =========================
 MODEL_ID = "AxionLab-Co/DogeAI-v2.0-4B-Reasoning"
-MAX_NEW_TOKENS = 1024
 tokenizer = None
 model = None
 # =========================
-# LOAD MODEL (LAZY)
 # =========================
 def load_model():
     global tokenizer, model
@@ -28,7 +27,7 @@ def load_model():
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_ID,
             device_map="cpu",
-            dtype=torch.float32,
             low_cpu_mem_usage=True
         )
@@ -38,17 +37,19 @@ def load_model():
 # =========================
-# PROMPT (REASONING-FIRST)
 # =========================
-def build_prompt(user_input):
-    return f"""
-You are DogeAI-v2.0-4B-Reasoning.
-You MUST think step by step.
-Break the problem into parts.
-Reason explicitly before answering.
-Then provide a clear final answer.
-If the user is speaking Brazilian Portuguese, use brazilian slangs, be the Doge guy, but don't stop thinking seriously. 🐕🇧🇷
 User:
 {user_input}
@@ -58,11 +59,14 @@ Assistant:
 # =========================
-# CHAT FUNCTION
 # =========================
 def chat(user_input):
     tokenizer, model = load_model()
     prompt = build_prompt(user_input)
     inputs = tokenizer(
@@ -84,7 +88,10 @@ def chat(user_input):
         skip_special_tokens=True
     )
-    return text.replace(prompt, "").strip()
 # =========================
@@ -93,22 +100,22 @@ def chat(user_input):
 with gr.Blocks(title="DogeAI-v2.0-4B-Reasoning") as demo:
     gr.Markdown(
         "# 🐕 DogeAI-v2.0-4B-Reasoning\n"
-        "**4-bit reasoning model running on HF Space (CPU)**\n\n"
-        "Focused on explicit thinking, not raw speed."
     )
     input_box = gr.Textbox(
-        label="Your question",
-        placeholder="Ask something that requires reasoning...",
         lines=4
     )
     output_box = gr.Textbox(
-        label="DogeAI Response",
         lines=14
     )
-    run_btn = gr.Button("Think 🧠")
     run_btn.click(
         fn=chat,

 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # =========================
 # CONFIG
 # =========================
 MODEL_ID = "AxionLab-Co/DogeAI-v2.0-4B-Reasoning"
+MAX_NEW_TOKENS = 512  # menor = menos timeout em CPU
 tokenizer = None
 model = None
 # =========================
+# LOAD MODEL (LAZY + SAFE)
 # =========================
 def load_model():
     global tokenizer, model
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_ID,
             device_map="cpu",
+            torch_dtype=torch.float32,
             low_cpu_mem_usage=True
         )
 # =========================
+# PROMPT (CPU-FRIENDLY)
 # =========================
+def build_prompt(user_input: str) -> str:
+    return f"""You are DogeAI-v2.0-4B-Reasoning.
+Think step by step internally.
+Do not reveal your full chain-of-thought.
+Provide a clear final answer with a short explanation.
+If the user speaks Brazilian Portuguese:
+- use Brazilian slang lightly
+- keep the Doge vibe 🐕🇧🇷
+- stay serious and logical
 User:
 {user_input}
 # =========================
+# CHAT FUNCTION (SSE-SAFE)
 # =========================
 def chat(user_input):
     tokenizer, model = load_model()
+    # mantém o SSE vivo imediatamente
+    yield "🤔 DogeAI está pensando... segura aí..."
     prompt = build_prompt(user_input)
     inputs = tokenizer(
         skip_special_tokens=True
     )
+    # remove o prompt da resposta final
+    response = text.split("Assistant:", 1)[-1].strip()
+    yield response
 # =========================
 with gr.Blocks(title="DogeAI-v2.0-4B-Reasoning") as demo:
     gr.Markdown(
         "# 🐕 DogeAI-v2.0-4B-Reasoning\n"
+        "**4B reasoning model rodando em CPU no HF Space**\n\n"
+        "Pensamento explícito interno, resposta clara externa."
     )
     input_box = gr.Textbox(
+        label="Pergunta",
+        placeholder="Pergunta que exige raciocínio de verdade...",
         lines=4
     )
     output_box = gr.Textbox(
+        label="Resposta do DogeAI",
         lines=14
     )
+    run_btn = gr.Button("Pensar 🧠🐕")
     run_btn.click(
         fn=chat,