Spaces:
OrbitMC
/
Configuration error

OrbitMC commited on
Commit
57b7ba8
·
verified ·
1 Parent(s): f71bbec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -47
app.py CHANGED
@@ -9,78 +9,65 @@ from duckduckgo_search import DDGS
9
  HF_TOKEN = os.getenv('HF_TOKEN')
10
  MODEL_ID = "google/gemma-3-270m-it"
11
 
12
- print("--- [1/5] Initializing ---")
13
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
14
 
15
- print("--- [2/5] Loading Model ---")
 
16
  model = AutoModelForCausalLM.from_pretrained(
17
  MODEL_ID,
18
  device_map="cpu",
19
- dtype=torch.float32,
20
  low_cpu_mem_usage=True,
21
  trust_remote_code=True,
22
  token=HF_TOKEN
23
  )
24
- print("--- [3/5] Model Loaded! ---")
25
-
26
- # Optimize for CPU
27
- torch.set_num_threads(2)
28
 
29
- def web_search(query):
 
30
  try:
31
  with DDGS() as ddgs:
32
- results = [f"Source: {r['href']}\n{r['body']}" for r in ddgs.text(query, max_results=3)]
33
- return "\n\n".join(results)
34
  except:
35
- return "Search currently unavailable."
36
 
37
- def generate(message, history, search_enabled, tokens, temp):
38
- # In older Gradio, history is a list of lists: [[user, bot], [user, bot]]
39
- # We just need the current message and the search toggle
 
40
 
41
- context = ""
42
- if search_enabled:
43
- print(f"Searching web for: {message}")
44
- context = web_search(message)
45
-
46
- prompt = f"System: Use context to help.\nContext: {context}\n\nUser: {message}\nAssistant:"
47
-
48
- inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
49
- streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
50
 
51
  gen_kwargs = dict(
52
- **inputs,
53
- streamer=streamer,
54
  max_new_tokens=int(tokens),
55
- do_sample=True,
56
- temperature=float(temp),
57
- top_p=0.9,
58
  )
59
 
60
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
61
  thread.start()
62
 
63
- response = ""
64
- for text in streamer:
65
- response += text
66
- yield response
67
-
68
- print("--- [4/5] Building Interface ---")
69
 
70
- # Removed 'type' argument to ensure compatibility with Gradio 4
 
71
  demo = gr.ChatInterface(
72
- fn=generate,
73
  additional_inputs=[
74
- gr.Checkbox(label="Enable Web Search", value=True),
75
- gr.Slider(128, 1024, 512, step=64, label="Max New Tokens"),
76
- gr.Slider(0.1, 1.2, 0.7, step=0.1, label="Temperature"),
77
- ],
78
- title="Gemma 3 Web Search Bot",
79
- theme="soft"
80
  )
81
 
82
- print("--- [5/5] Launching! ---")
83
  if __name__ == "__main__":
84
- # If share=True fails, OrbitMC might not allow tunnels.
85
- # Try with it first, then remove if it crashes.
86
- demo.launch(server_name="0.0.0.0", share=True)
 
9
  HF_TOKEN = os.getenv('HF_TOKEN')
10
  MODEL_ID = "google/gemma-3-270m-it"
11
 
12
+ print("--- [1] INITIALIZING ---")
13
+ torch.set_num_threads(1) # Keeps the CPU from redlining
14
 
15
+ # Load Tokenizer & Model
16
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
17
  model = AutoModelForCausalLM.from_pretrained(
18
  MODEL_ID,
19
  device_map="cpu",
20
+ torch_dtype=torch.bfloat16, # Saves 500MB of RAM vs float32
21
  low_cpu_mem_usage=True,
22
  trust_remote_code=True,
23
  token=HF_TOKEN
24
  )
25
+ print("--- [2] MODEL LOADED SUCCESSFULLY ---")
 
 
 
26
 
27
+ def search_the_web(query):
28
+ """Safe search helper."""
29
  try:
30
  with DDGS() as ddgs:
31
+ return "\n".join([r['body'] for r in list(ddgs.text(query, max_results=2))])
 
32
  except:
33
+ return ""
34
 
35
+ def chat_function(message, history, search_on, tokens, temp):
36
+ """The core generation loop."""
37
+ context = search_the_web(message) if search_on else ""
38
+ full_prompt = f"Context: {context}\n\nUser: {message}\nAssistant:"
39
 
40
+ inputs = tokenizer(full_prompt, return_tensors="pt").to("cpu")
41
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 
 
 
 
 
 
 
42
 
43
  gen_kwargs = dict(
44
+ input_ids=inputs["input_ids"],
45
+ streamer=streamer,
46
  max_new_tokens=int(tokens),
47
+ do_sample=True,
48
+ temperature=float(temp),
 
49
  )
50
 
51
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
52
  thread.start()
53
 
54
+ output = ""
55
+ for new_text in streamer:
56
+ output += new_text
57
+ yield output
 
 
58
 
59
+ # --- THE INTERFACE (SAFE VERSION) ---
60
+ print("--- [3] BUILDING UI ---")
61
  demo = gr.ChatInterface(
62
+ fn=chat_function,
63
  additional_inputs=[
64
+ gr.Checkbox(label="Web Search", value=True),
65
+ gr.Slider(128, 1024, 512, label="Length"),
66
+ gr.Slider(0.1, 1.2, 0.7, label="Chaos Level"),
67
+ ]
 
 
68
  )
69
 
 
70
  if __name__ == "__main__":
71
+ print("--- [4] LAUNCHING (ON PORT 7860) ---")
72
+ # share=True is removed because it causes loops on OrbitMC/Restricted hosts
73
+ demo.launch(server_name="0.0.0.0")