Spaces:
OrbitMC
/
Configuration error

OrbitMC commited on
Commit
ac69c7e
·
verified ·
1 Parent(s): 4b07097

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -50
app.py CHANGED
@@ -5,79 +5,72 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStream
5
  from threading import Thread
6
  from duckduckgo_search import DDGS
7
 
8
- # --- CONFIG ---
9
- MODEL_ID = "google/gemma-3-270m-it"
10
  HF_TOKEN = os.getenv('HF_TOKEN')
 
 
 
11
 
12
- # --- MODEL LOADING ---
13
- print("--- INITIALIZING GEMMA 3 (CPU MODE) ---")
14
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
15
 
16
- model = AutoModelForCausalLM.from_pretrained(
17
- MODEL_ID,
18
- device_map="cpu", # Requires 'accelerate' to be installed
19
- dtype=torch.float32, # Updated from torch_dtype to fix deprecation
20
- low_cpu_mem_usage=True,
21
- trust_remote_code=True,
22
- token=HF_TOKEN
23
- )
 
 
 
 
 
 
24
 
25
- # Limit CPU threads so the UI stays snappy
26
- torch.set_num_threads(max(1, (os.cpu_count() or 2) // 2))
27
 
28
  def web_search(query):
29
- """Fetch live data to ground the AI's response."""
30
- results = []
31
  try:
32
  with DDGS() as ddgs:
33
- for r in ddgs.text(query, max_results=3):
34
- results.append(f"Source: {r['href']}\nContent: {r['body']}")
35
- return "\n\n".join(results) if results else "No relevant web data found."
36
- except Exception as e:
37
- return f"Search error: {e}"
38
-
39
- def generate_response(message, history, search_enabled, max_new_tokens, temperature):
40
- context = ""
41
- if search_enabled:
42
- print(f"Searching for: {message}")
43
- context = web_search(message)
44
 
45
- # Gemma 3 Prompt Template
 
46
  prompt = f"Context: {context}\n\nUser: {message}\nAssistant:"
47
 
48
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
49
  streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
50
 
51
- generate_kwargs = dict(
52
- **inputs,
53
- streamer=streamer,
54
- max_new_tokens=int(max_new_tokens),
55
- do_sample=True,
56
- temperature=float(temperature),
57
- top_p=0.9,
58
  )
59
 
60
- thread = Thread(target=model.generate, kwargs=generate_kwargs)
61
  thread.start()
62
 
63
- partial_text = ""
64
- for new_text in streamer:
65
- partial_text += new_text
66
- yield partial_text
67
 
68
- # --- GRADIO UI ---
 
69
  demo = gr.ChatInterface(
70
- fn=generate_response,
71
  additional_inputs=[
72
- gr.Checkbox(label="🌐 Enable Web Search", value=True),
73
- gr.Slider(minimum=128, maximum=1024, value=512, step=128, label="Max Tokens"),
74
- gr.Slider(minimum=0.1, maximum=1.2, value=0.7, step=0.1, label="Temperature"),
75
  ],
76
- title="Gemma 3 Orbit Explorer",
77
- description="Optimized for CPU. This bot uses DuckDuckGo to stay up to date.",
78
- theme="glass",
79
- type="messages"
80
  )
81
 
82
  if __name__ == "__main__":
83
- demo.queue().launch(server_name="0.0.0.0")
 
5
  from threading import Thread
6
  from duckduckgo_search import DDGS
7
 
8
+ # --- STEP 1: LOAD ENV VARS ---
 
9
  HF_TOKEN = os.getenv('HF_TOKEN')
10
+ MODEL_ID = "google/gemma-3-270m-it"
11
+
12
+ print(f"--- [1/5] Initializing for {MODEL_ID} ---")
13
 
14
+ # --- STEP 2: LOAD TOKENIZER ---
15
+ print("--- [2/5] Loading Tokenizer... ---")
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
17
 
18
+ # --- STEP 3: LOAD MODEL (MEMORY OPTIMIZED) ---
19
+ print("--- [3/5] Materializing Model (This is where hangs usually happen)... ---")
20
+ try:
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ MODEL_ID,
23
+ device_map="cpu",
24
+ dtype=torch.float32,
25
+ low_cpu_mem_usage=True, # CRITICAL: Prevents RAM from spiking and hanging
26
+ trust_remote_code=True,
27
+ token=HF_TOKEN
28
+ )
29
+ print("--- [4/5] Model Loaded Successfully! ---")
30
+ except Exception as e:
31
+ print(f"FATAL ERROR DURING LOADING: {e}")
32
 
33
+ # Optimize CPU threads
34
+ torch.set_num_threads(2)
35
 
36
  def web_search(query):
 
 
37
  try:
38
  with DDGS() as ddgs:
39
+ return "\n\n".join([f"Source: {r['href']}\n{r['body']}" for r in ddgs.text(query, max_results=3)])
40
+ except:
41
+ return "Search failed."
 
 
 
 
 
 
 
 
42
 
43
+ def generate(message, history, search_enabled, tokens, temp):
44
+ context = web_search(message) if search_enabled else ""
45
  prompt = f"Context: {context}\n\nUser: {message}\nAssistant:"
46
 
47
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
48
  streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
49
 
50
+ gen_kwargs = dict(
51
+ **inputs, streamer=streamer, max_new_tokens=int(tokens),
52
+ do_sample=True, temperature=float(temp), top_p=0.9,
 
 
 
 
53
  )
54
 
55
+ thread = Thread(target=model.generate, kwargs=gen_kwargs)
56
  thread.start()
57
 
58
+ response = ""
59
+ for text in streamer:
60
+ response += text
61
+ yield response
62
 
63
+ # --- STEP 4: UI SETUP ---
64
+ print("--- [5/5] Launching Gradio UI... ---")
65
  demo = gr.ChatInterface(
66
+ fn=generate,
67
  additional_inputs=[
68
+ gr.Checkbox(label="Search Web", value=True),
69
+ gr.Slider(128, 1024, 512, label="Max Tokens"),
70
+ gr.Slider(0.1, 1.2, 0.7, label="Temp"),
71
  ],
72
+ type="messages"
 
 
 
73
  )
74
 
75
  if __name__ == "__main__":
76
+ demo.launch(server_name="0.0.0.0")