Redhanuman commited on
Commit
23a44d7
Β·
verified Β·
1 Parent(s): 7f27c04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -24
app.py CHANGED
@@ -1,36 +1,36 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
4
  from threading import Thread
5
 
6
- # --- CONFIGURATION ---
7
- MODEL_ID = "Redhanuman/Shadow-0.7B" # Your Hugging Face repo
8
 
9
- # --- LOAD MODEL ---
10
  print("πŸŒ‘ Loading Shadow Brain...")
11
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
12
- model = AutoModelForCausalLM.from_pretrained(
13
- MODEL_ID,
 
14
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
15
  device_map="auto"
16
  )
 
 
17
  model.eval()
18
 
19
- # --- INFERENCE FUNCTION ---
20
  def predict(message, history):
21
  system_prompt = (
22
  "You are Shadow 0.7B, a reasoning AI created by Aman Kumar Pandey. "
23
  "Use <think> tags to plan logic before answering."
24
  )
25
 
26
- # Prepare conversation history
27
  messages = [{"role": "system", "content": system_prompt}]
28
  for user_msg, bot_msg in history:
29
  messages.append({"role": "user", "content": user_msg})
30
  messages.append({"role": "assistant", "content": bot_msg})
31
  messages.append({"role": "user", "content": message})
32
 
33
- # Tokenize input using chat template
34
  input_ids = tokenizer.apply_chat_template(
35
  messages,
36
  tokenize=True,
@@ -38,7 +38,6 @@ def predict(message, history):
38
  return_tensors="pt"
39
  ).to(model.device)
40
 
41
- # Streamer for token-by-token output
42
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
43
  generation_kwargs = dict(
44
  input_ids=input_ids,
@@ -49,7 +48,6 @@ def predict(message, history):
49
  repetition_penalty=1.1,
50
  )
51
 
52
- # Generate in separate thread
53
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
54
  thread.start()
55
 
@@ -58,20 +56,37 @@ def predict(message, history):
58
  partial_message += new_token
59
  yield partial_message
60
 
61
- # --- GRADIO APP ---
62
  custom_css = """
63
- body { background-color: #0b0f19; color: #e0e0e0; }
64
- gradio-app { background-color: #0b0f19; }
65
- .message.user { border-color: #3b82f6 !important; background: #1e293b !important; }
66
- .message.bot { border-color: #8b5cf6 !important; background: #0f172a !important; }
67
- h1 { color: #f8fafc; font-family: 'Inter', sans-serif; font-weight: 800; }
68
- footer { display: none !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  """
70
 
71
- # --- GRADIO APP ---
72
- with gr.Blocks(css=custom_css) as demo: # Removed theme=gr.themes.Base()
73
  gr.Markdown("# πŸŒ‘ Shadow 0.7B")
74
- gr.Markdown("Created by **Aman Kumar Pandey** | Focused on Logic & Reasoning")
75
 
76
  chat = gr.ChatInterface(
77
  fn=predict,
@@ -79,11 +94,10 @@ with gr.Blocks(css=custom_css) as demo: # Removed theme=gr.themes.Base()
79
  undo_btn=None,
80
  clear_btn="πŸ—‘οΈ Clear Memory",
81
  examples=[
82
- "Who created you?",
83
  "Write a Python function to check for palindromes.",
84
  "If I have 3 apples and eat one, how many do I have?"
85
  ],
86
  )
87
 
88
- demo.queue().launch()
89
-
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
+ from peft import PeftModel
5
  from threading import Thread
6
 
7
+ BASE_MODEL = "Qwen/Qwen3-0.6B"
8
+ ADAPTER_ID = "Redhanuman/Shadow-0.7B"
9
 
 
10
  print("πŸŒ‘ Loading Shadow Brain...")
11
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
12
+
13
+ base_model = AutoModelForCausalLM.from_pretrained(
14
+ BASE_MODEL,
15
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
16
  device_map="auto"
17
  )
18
+
19
+ model = PeftModel.from_pretrained(base_model, ADAPTER_ID)
20
  model.eval()
21
 
 
22
  def predict(message, history):
23
  system_prompt = (
24
  "You are Shadow 0.7B, a reasoning AI created by Aman Kumar Pandey. "
25
  "Use <think> tags to plan logic before answering."
26
  )
27
 
 
28
  messages = [{"role": "system", "content": system_prompt}]
29
  for user_msg, bot_msg in history:
30
  messages.append({"role": "user", "content": user_msg})
31
  messages.append({"role": "assistant", "content": bot_msg})
32
  messages.append({"role": "user", "content": message})
33
 
 
34
  input_ids = tokenizer.apply_chat_template(
35
  messages,
36
  tokenize=True,
 
38
  return_tensors="pt"
39
  ).to(model.device)
40
 
 
41
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
42
  generation_kwargs = dict(
43
  input_ids=input_ids,
 
48
  repetition_penalty=1.1,
49
  )
50
 
 
51
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
52
  thread.start()
53
 
 
56
  partial_message += new_token
57
  yield partial_message
58
 
59
+ # Custom CSS for dark theme
60
  custom_css = """
61
+ body {
62
+ background-color: #0b0f19 !important;
63
+ color: #e0e0e0 !important;
64
+ }
65
+ .gradio-container {
66
+ background-color: #0b0f19 !important;
67
+ }
68
+ .message.user {
69
+ border-color: #3b82f6 !important;
70
+ background: #1e293b !important;
71
+ }
72
+ .message.bot {
73
+ border-color: #8b5cf6 !important;
74
+ background: #0f172a !important;
75
+ }
76
+ h1 {
77
+ color: #f8fafc !important;
78
+ font-family: 'Inter', sans-serif !important;
79
+ font-weight: 800 !important;
80
+ }
81
+ footer {
82
+ display: none !important;
83
+ }
84
  """
85
 
86
+ # Create the Gradio interface
87
+ with gr.Blocks(css=custom_css) as demo:
88
  gr.Markdown("# πŸŒ‘ Shadow 0.7B")
89
+ gr.Markdown("Created by **Aman Kumar Pandey** | Focused on Code Logic & Reasoning")
90
 
91
  chat = gr.ChatInterface(
92
  fn=predict,
 
94
  undo_btn=None,
95
  clear_btn="πŸ—‘οΈ Clear Memory",
96
  examples=[
 
97
  "Write a Python function to check for palindromes.",
98
  "If I have 3 apples and eat one, how many do I have?"
99
  ],
100
  )
101
 
102
+ if __name__ == "__main__":
103
+ demo.queue().launch()