Spaces:

Redhanuman
/

shadow-0.7b

Sleeping

App Files Files Community

Redhanuman commited on Dec 4, 2025

Commit

23a44d7

verified ·

1 Parent(s): 7f27c04

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -24

app.py CHANGED Viewed

@@ -1,36 +1,36 @@
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from threading import Thread
-# --- CONFIGURATION ---
-MODEL_ID = "Redhanuman/Shadow-0.7B"  # Your Hugging Face repo
-# --- LOAD MODEL ---
 print("🌑 Loading Shadow Brain...")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
     device_map="auto"
 )
 model.eval()
-# --- INFERENCE FUNCTION ---
 def predict(message, history):
     system_prompt = (
         "You are Shadow 0.7B, a reasoning AI created by Aman Kumar Pandey. "
         "Use <think> tags to plan logic before answering."
     )
-    # Prepare conversation history
     messages = [{"role": "system", "content": system_prompt}]
     for user_msg, bot_msg in history:
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": bot_msg})
     messages.append({"role": "user", "content": message})
-    # Tokenize input using chat template
     input_ids = tokenizer.apply_chat_template(
         messages,
         tokenize=True,
@@ -38,7 +38,6 @@ def predict(message, history):
         return_tensors="pt"
     ).to(model.device)
-    # Streamer for token-by-token output
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = dict(
         input_ids=input_ids,
@@ -49,7 +48,6 @@ def predict(message, history):
         repetition_penalty=1.1,
     )
-    # Generate in separate thread
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
@@ -58,20 +56,37 @@ def predict(message, history):
         partial_message += new_token
         yield partial_message
-# --- GRADIO APP ---
 custom_css = """
-body { background-color: #0b0f19; color: #e0e0e0; }
-gradio-app { background-color: #0b0f19; }
-.message.user { border-color: #3b82f6 !important; background: #1e293b !important; }
-.message.bot { border-color: #8b5cf6 !important; background: #0f172a !important; }
-h1 { color: #f8fafc; font-family: 'Inter', sans-serif; font-weight: 800; }
-footer { display: none !important; }
 """
-# --- GRADIO APP ---
-with gr.Blocks(css=custom_css) as demo:  # Removed theme=gr.themes.Base()
     gr.Markdown("# 🌑 Shadow 0.7B")
-    gr.Markdown("Created by **Aman Kumar Pandey** | Focused on Logic & Reasoning")
     chat = gr.ChatInterface(
         fn=predict,
@@ -79,11 +94,10 @@ with gr.Blocks(css=custom_css) as demo:  # Removed theme=gr.themes.Base()
         undo_btn=None,
         clear_btn="🗑️ Clear Memory",
         examples=[
-            "Who created you?",
             "Write a Python function to check for palindromes.",
             "If I have 3 apples and eat one, how many do I have?"
         ],
     )
-demo.queue().launch()

 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from peft import PeftModel
 from threading import Thread
+BASE_MODEL = "Qwen/Qwen3-0.6B"
+ADAPTER_ID = "Redhanuman/Shadow-0.7B"
 print("🌑 Loading Shadow Brain...")
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+base_model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
     device_map="auto"
 )
+model = PeftModel.from_pretrained(base_model, ADAPTER_ID)
 model.eval()
 def predict(message, history):
     system_prompt = (
         "You are Shadow 0.7B, a reasoning AI created by Aman Kumar Pandey. "
         "Use <think> tags to plan logic before answering."
     )
     messages = [{"role": "system", "content": system_prompt}]
     for user_msg, bot_msg in history:
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": bot_msg})
     messages.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(
         messages,
         tokenize=True,
         return_tensors="pt"
     ).to(model.device)
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = dict(
         input_ids=input_ids,
         repetition_penalty=1.1,
     )
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
         partial_message += new_token
         yield partial_message
+# Custom CSS for dark theme
 custom_css = """
+body {
+    background-color: #0b0f19 !important;
+    color: #e0e0e0 !important;
+}
+.gradio-container {
+    background-color: #0b0f19 !important;
+}
+.message.user {
+    border-color: #3b82f6 !important;
+    background: #1e293b !important;
+}
+.message.bot {
+    border-color: #8b5cf6 !important;
+    background: #0f172a !important;
+}
+h1 {
+    color: #f8fafc !important;
+    font-family: 'Inter', sans-serif !important;
+    font-weight: 800 !important;
+}
+footer {
+    display: none !important;
+}
 """
+# Create the Gradio interface
+with gr.Blocks(css=custom_css) as demo:
     gr.Markdown("# 🌑 Shadow 0.7B")
+    gr.Markdown("Created by **Aman Kumar Pandey** | Focused on Code Logic & Reasoning")
     chat = gr.ChatInterface(
         fn=predict,
         undo_btn=None,
         clear_btn="🗑️ Clear Memory",
         examples=[
             "Write a Python function to check for palindromes.",
             "If I have 3 apples and eat one, how many do I have?"
         ],
     )
+if __name__ == "__main__":
+    demo.queue().launch()