Spaces:
OrbitMC
/
Configuration error

OrbitMC commited on
Commit
f71bbec
·
verified ·
1 Parent(s): ac69c7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -33
app.py CHANGED
@@ -5,51 +5,56 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStream
5
  from threading import Thread
6
  from duckduckgo_search import DDGS
7
 
8
- # --- STEP 1: LOAD ENV VARS ---
9
  HF_TOKEN = os.getenv('HF_TOKEN')
10
  MODEL_ID = "google/gemma-3-270m-it"
11
 
12
- print(f"--- [1/5] Initializing for {MODEL_ID} ---")
13
-
14
- # --- STEP 2: LOAD TOKENIZER ---
15
- print("--- [2/5] Loading Tokenizer... ---")
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
17
 
18
- # --- STEP 3: LOAD MODEL (MEMORY OPTIMIZED) ---
19
- print("--- [3/5] Materializing Model (This is where hangs usually happen)... ---")
20
- try:
21
- model = AutoModelForCausalLM.from_pretrained(
22
- MODEL_ID,
23
- device_map="cpu",
24
- dtype=torch.float32,
25
- low_cpu_mem_usage=True, # CRITICAL: Prevents RAM from spiking and hanging
26
- trust_remote_code=True,
27
- token=HF_TOKEN
28
- )
29
- print("--- [4/5] Model Loaded Successfully! ---")
30
- except Exception as e:
31
- print(f"FATAL ERROR DURING LOADING: {e}")
32
 
33
- # Optimize CPU threads
34
  torch.set_num_threads(2)
35
 
36
  def web_search(query):
37
  try:
38
  with DDGS() as ddgs:
39
- return "\n\n".join([f"Source: {r['href']}\n{r['body']}" for r in ddgs.text(query, max_results=3)])
 
40
  except:
41
- return "Search failed."
42
 
43
  def generate(message, history, search_enabled, tokens, temp):
44
- context = web_search(message) if search_enabled else ""
45
- prompt = f"Context: {context}\n\nUser: {message}\nAssistant:"
 
 
 
 
 
 
 
46
 
47
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
48
  streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
49
 
50
  gen_kwargs = dict(
51
- **inputs, streamer=streamer, max_new_tokens=int(tokens),
52
- do_sample=True, temperature=float(temp), top_p=0.9,
 
 
 
 
53
  )
54
 
55
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
@@ -60,17 +65,22 @@ def generate(message, history, search_enabled, tokens, temp):
60
  response += text
61
  yield response
62
 
63
- # --- STEP 4: UI SETUP ---
64
- print("--- [5/5] Launching Gradio UI... ---")
 
65
  demo = gr.ChatInterface(
66
  fn=generate,
67
  additional_inputs=[
68
- gr.Checkbox(label="Search Web", value=True),
69
- gr.Slider(128, 1024, 512, label="Max Tokens"),
70
- gr.Slider(0.1, 1.2, 0.7, label="Temp"),
71
  ],
72
- type="messages"
 
73
  )
74
 
 
75
  if __name__ == "__main__":
76
- demo.launch(server_name="0.0.0.0")
 
 
 
5
  from threading import Thread
6
  from duckduckgo_search import DDGS
7
 
8
+ # --- CONFIG ---
9
  HF_TOKEN = os.getenv('HF_TOKEN')
10
  MODEL_ID = "google/gemma-3-270m-it"
11
 
12
+ print("--- [1/5] Initializing ---")
 
 
 
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
14
 
15
+ print("--- [2/5] Loading Model ---")
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ MODEL_ID,
18
+ device_map="cpu",
19
+ dtype=torch.float32,
20
+ low_cpu_mem_usage=True,
21
+ trust_remote_code=True,
22
+ token=HF_TOKEN
23
+ )
24
+ print("--- [3/5] Model Loaded! ---")
 
 
 
 
25
 
26
+ # Optimize for CPU
27
  torch.set_num_threads(2)
28
 
29
  def web_search(query):
30
  try:
31
  with DDGS() as ddgs:
32
+ results = [f"Source: {r['href']}\n{r['body']}" for r in ddgs.text(query, max_results=3)]
33
+ return "\n\n".join(results)
34
  except:
35
+ return "Search currently unavailable."
36
 
37
  def generate(message, history, search_enabled, tokens, temp):
38
+ # In older Gradio, history is a list of lists: [[user, bot], [user, bot]]
39
+ # We just need the current message and the search toggle
40
+
41
+ context = ""
42
+ if search_enabled:
43
+ print(f"Searching web for: {message}")
44
+ context = web_search(message)
45
+
46
+ prompt = f"System: Use context to help.\nContext: {context}\n\nUser: {message}\nAssistant:"
47
 
48
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
49
  streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
50
 
51
  gen_kwargs = dict(
52
+ **inputs,
53
+ streamer=streamer,
54
+ max_new_tokens=int(tokens),
55
+ do_sample=True,
56
+ temperature=float(temp),
57
+ top_p=0.9,
58
  )
59
 
60
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
 
65
  response += text
66
  yield response
67
 
68
+ print("--- [4/5] Building Interface ---")
69
+
70
+ # Removed 'type' argument to ensure compatibility with Gradio 4
71
  demo = gr.ChatInterface(
72
  fn=generate,
73
  additional_inputs=[
74
+ gr.Checkbox(label="Enable Web Search", value=True),
75
+ gr.Slider(128, 1024, 512, step=64, label="Max New Tokens"),
76
+ gr.Slider(0.1, 1.2, 0.7, step=0.1, label="Temperature"),
77
  ],
78
+ title="Gemma 3 Web Search Bot",
79
+ theme="soft"
80
  )
81
 
82
+ print("--- [5/5] Launching! ---")
83
  if __name__ == "__main__":
84
+ # If share=True fails, OrbitMC might not allow tunnels.
85
+ # Try with it first, then remove if it crashes.
86
+ demo.launch(server_name="0.0.0.0", share=True)