Redhanuman commited on
Commit
155d195
·
verified ·
1 Parent(s): 7d8255a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
+ from peft import PeftModel
5
+ from threading import Thread
6
+
7
+ BASE_MODEL = "Qwen/Qwen3-0.6B"
8
+ ADAPTER_ID = "Redhanuman/Shadow-0.7B"
9
+
10
+ print("🌑 Loading Shadow Brain...")
11
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
12
+
13
+ base_model = AutoModelForCausalLM.from_pretrained(
14
+ BASE_MODEL,
15
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
16
+ device_map="auto"
17
+ )
18
+
19
+ model = PeftModel.from_pretrained(base_model, ADAPTER_ID)
20
+ model.eval()
21
+
22
+ def predict(message, history):
23
+ system_prompt = (
24
+ "You are Shadow 0.7B, a reasoning AI created by Aman Kumar Pandey. "
25
+ "Use <think> tags to plan logic before answering."
26
+ )
27
+
28
+ messages = [{"role": "system", "content": system_prompt}]
29
+ for user_msg, bot_msg in history:
30
+ messages.append({"role": "user", "content": user_msg})
31
+ messages.append({"role": "assistant", "content": bot_msg})
32
+ messages.append({"role": "user", "content": message})
33
+
34
+ input_ids = tokenizer.apply_chat_template(
35
+ messages,
36
+ tokenize=True,
37
+ add_generation_prompt=True,
38
+ return_tensors="pt"
39
+ ).to(model.device)
40
+
41
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
42
+ generation_kwargs = dict(
43
+ input_ids=input_ids,
44
+ streamer=streamer,
45
+ max_new_tokens=1024,
46
+ temperature=0.7,
47
+ top_p=0.9,
48
+ repetition_penalty=1.1,
49
+ )
50
+
51
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
52
+ thread.start()
53
+
54
+ partial_message = ""
55
+ for new_token in streamer:
56
+ partial_message += new_token
57
+ yield partial_message
58
+
59
+ custom_css = """
60
+ body { background-color: #0b0f19; color: #e0e0e0; }
61
+ gradio-app { background-color: #0b0f19; }
62
+ .message.user { border-color: #3b82f6 !important; background: #1e293b !important; }
63
+ .message.bot { border-color: #8b5cf6 !important; background: #0f172a !important; }
64
+ h1 { color: #f8fafc; font-family: 'Inter', sans-serif; font-weight: 800; }
65
+ footer { display: none !important; }
66
+ """
67
+
68
+ with gr.Blocks(theme=gr.themes.Base(), css=custom_css) as demo:
69
+ gr.Markdown("# 🌑 Shadow 0.7B")
70
+ gr.Markdown("Created by **Aman Kumar Pandey** | Focused on Code Logic & Reasoning")
71
+
72
+ chat = gr.ChatInterface(
73
+ fn=predict,
74
+ retry_btn=None,
75
+ undo_btn=None,
76
+ clear_btn="🗑️ Clear Memory",
77
+ examples=[
78
+ "Write a Python function to check for palindromes.",
79
+ "If I have 3 apples and eat one, how many do I have?"
80
+ ],
81
+ )
82
+ demo.queue().launch()