OrbitMC commited on
Commit
e7b5bc2
Β·
verified Β·
1 Parent(s): b796c25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -144
app.py CHANGED
@@ -1,215 +1,148 @@
1
  import time
2
  import gradio as gr
3
- from openai import OpenAI
4
  from duckduckgo_search import DDGS
5
- import json
6
 
7
- # --- Logic Functions ---
 
 
 
 
 
 
 
 
8
 
9
- client = OpenAI(base_url="http://localhost:8080/v1", api_key="no-key-required")
10
 
11
  def search_web(query):
12
  try:
13
  with DDGS() as ddgs:
14
- results = [r for r in ddgs.text(query, max_results=5)]
15
- if not results:
16
- return None
17
- context = "\n".join([f"Source: {r['title']}\nContent: {r['body']}" for r in results])
18
- return context
19
  except Exception as e:
20
  print(f"Search error: {e}")
21
  return None
22
 
23
  def format_time(seconds_float):
24
- total_seconds = int(round(seconds_float))
25
- m, s = divmod(total_seconds, 60)
26
  h, m = divmod(m, 60)
27
  return f"{h}h {m}m {s}s" if h > 0 else f"{m}m {s}s" if m > 0 else f"{s}s"
28
 
29
  class ParserState:
30
- __slots__ = ['answer', 'thought', 'in_think', 'start_time', 'last_pos', 'total_think_time']
31
  def __init__(self):
32
  self.answer = ""
33
  self.thought = ""
34
  self.in_think = False
35
  self.start_time = 0
36
- self.last_pos = 0
37
  self.total_think_time = 0.0
38
 
39
- def parse_response(text, state):
40
- buffer = text[state.last_pos:]
41
- state.last_pos = len(text)
42
- while buffer:
43
- if not state.in_think:
44
- think_start = buffer.find('<think>')
45
- if think_start != -1:
46
- state.answer += buffer[:think_start]
47
- state.in_think = True
48
- state.start_time = time.perf_counter()
49
- buffer = buffer[think_start + 7:]
50
- else:
51
- state.answer += buffer
52
- break
53
- else:
54
- think_end = buffer.find('</think>')
55
- if think_end != -1:
56
- state.thought += buffer[:think_end]
57
- state.total_think_time += (time.perf_counter() - state.start_time)
58
- state.in_think = False
59
- buffer = buffer[think_end + 8:]
60
- else:
61
- state.thought += buffer
62
- break
63
- return state
64
-
65
  def format_ui_response(state):
66
  collapsible = ""
67
  if state.thought or state.in_think:
68
  status = f"πŸŒ€ Thinking ({format_time(state.total_think_time)})" if state.in_think else f"βœ… Thought for {format_time(state.total_think_time)}"
69
  open_tag = "open" if state.in_think else ""
70
- collapsible = f"<details {open_tag}><summary>{status}</summary><div class='thinking-container'>{state.thought}</div></details>"
71
  return f"{collapsible}\n\n{state.answer}"
72
 
73
- # --- Gradio UI Handlers ---
74
-
75
- def user_msg(user_input, history):
76
- return "", history + [[user_input, None]]
77
 
78
  def generate_response(history, search_enabled, temp, top_p, max_tok, active_gen):
79
  if not history: return history
80
 
81
  query = history[-1][0]
82
- full_prompt = query
83
 
84
- # Perform Search if enabled
85
  if search_enabled:
86
  history[-1][1] = "πŸ” Searching the web..."
87
  yield history
88
- search_results = search_web(query)
89
- if search_results:
90
- full_prompt = f"User Query: {query}\n\nWeb Search Results:\n{search_results}\n\nInstruction: Use the search results to provide a comprehensive answer."
91
- else:
92
- history[-1][1] = "πŸ” No search results found. Proceeding with internal knowledge..."
93
- yield history
94
-
95
- messages = [{"role": "user", "content": full_prompt}]
96
  state = ParserState()
97
- full_text = ""
98
-
99
  try:
100
- stream = client.chat.completions.create(
101
- model="local",
102
- messages=messages,
103
  temperature=temp,
104
  top_p=top_p,
105
  max_tokens=max_tok,
106
  stream=True
107
  )
108
-
109
  for chunk in stream:
110
  if not active_gen[0]: break
111
- if chunk.choices[0].delta.content:
112
- full_text += chunk.choices[0].delta.content
113
- state = parse_response(full_text, state)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  history[-1][1] = format_ui_response(state)
115
  yield history
 
116
  except Exception as e:
117
  history[-1][1] = f"Error: {str(e)}"
118
  yield history
119
- finally:
120
- active_gen[0] = False
121
-
122
- # --- UI Setup ---
123
-
124
- CSS = """
125
- .thinking-container {
126
- border-left: 3px solid #facc15;
127
- padding: 10px;
128
- margin: 5px 0;
129
- background: rgba(250, 204, 21, 0.05);
130
- font-style: italic;
131
- color: #666;
132
- }
133
- details {
134
- border: 1px solid #ddd;
135
- border-radius: 8px;
136
- padding: 5px 10px;
137
- margin-bottom: 10px;
138
- }
139
- summary {
140
- cursor: pointer;
141
- font-weight: bold;
142
- color: #444;
143
- }
144
- """
145
-
146
- with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
147
  active_gen = gr.State([False])
148
 
 
 
149
  with gr.Row():
150
- with gr.Column(scale=8):
151
- gr.Markdown("# 🧠 Qwen3 Web Explorer\n*High-speed reasoning with real-time web access*")
152
- with gr.Column(scale=2):
153
- search_toggle = gr.Checkbox(label="🌐 Enable Web Search", value=False)
 
 
 
 
 
154
 
155
- chatbot = gr.Chatbot(height=550, show_label=False, bubble_full_width=False)
156
-
157
  with gr.Row():
158
- msg = gr.Textbox(
159
- placeholder="Ask me anything...",
160
- container=False,
161
- scale=7
162
- )
163
  submit_btn = gr.Button("Send", variant="primary", scale=1)
164
 
165
- with gr.Row():
166
- stop_btn = gr.Button("⏹ Stop", variant="secondary")
167
- regen_btn = gr.Button("πŸ”„ Regenerate", variant="secondary")
168
- undo_btn = gr.Button("↩️ Undo", variant="secondary")
169
- clear_btn = gr.Button("πŸ—‘ Clear Chat", variant="secondary")
170
-
171
- with gr.Accordion("Advanced Settings", open=False):
172
- with gr.Row():
173
- temp = gr.Slider(0.1, 1.5, 0.6, label="Temperature")
174
- top_p = gr.Slider(0.1, 1.0, 0.95, label="Top-p")
175
- max_tok = gr.Slider(1024, 32768, 4096, step=128, label="Max Tokens")
176
-
177
- # Functions
178
- def start_gen(): return [True]
179
- def stop_gen(): return [False]
180
- def undo(history):
181
- if len(history) > 0: history.pop()
182
- return history
183
-
184
- submit_event = submit_btn.click(
185
- user_msg, [msg, chatbot], [msg, chatbot], queue=False
186
- ).then(
187
- start_gen, None, active_gen
188
  ).then(
189
- generate_response, [chatbot, search_toggle, temp, top_p, max_tok, active_gen], chatbot
190
  )
191
-
192
  msg.submit(
193
- user_msg, [msg, chatbot], [msg, chatbot], queue=False
194
- ).then(
195
- start_gen, None, active_gen
196
- ).then(
197
- generate_response, [chatbot, search_toggle, temp, top_p, max_tok, active_gen], chatbot
198
- )
199
-
200
- regen_btn.click(
201
- lambda history: (history[-1][0], history[:-1]), [chatbot], [msg, chatbot], queue=False
202
- ).then(
203
- user_msg, [msg, chatbot], [msg, chatbot], queue=False
204
- ).then(
205
- start_gen, None, active_gen
206
  ).then(
207
- generate_response, [chatbot, search_toggle, temp, top_p, max_tok, active_gen], chatbot
208
  )
209
 
210
- stop_btn.click(stop_gen, None, active_gen, cancels=[submit_event])
211
- undo_btn.click(undo, [chatbot], [chatbot])
212
- clear_btn.click(lambda: None, None, chatbot)
213
 
214
  if __name__ == "__main__":
215
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import time
2
  import gradio as gr
3
+ from llama_cpp import Llama
4
  from duckduckgo_search import DDGS
 
5
 
6
+ # --- Initialize Model ---
7
+ print("Loading model from Hugging Face...")
8
+ llm = Llama.from_pretrained(
9
+ repo_id="unsloth/Qwen3-0.6B-GGUF",
10
+ filename="Qwen3-0.6B-BF16.gguf",
11
+ n_ctx=32768,
12
+ n_threads=None, # Automatically use all CPU cores
13
+ verbose=False
14
+ )
15
 
16
+ # --- Logic Functions ---
17
 
18
  def search_web(query):
19
  try:
20
  with DDGS() as ddgs:
21
+ results = [r for r in ddgs.text(query, max_results=3)]
22
+ if not results: return None
23
+ return "\n".join([f"Source: {r['title']}\nContent: {r['body']}" for r in results])
 
 
24
  except Exception as e:
25
  print(f"Search error: {e}")
26
  return None
27
 
28
  def format_time(seconds_float):
29
+ ts = int(round(seconds_float))
30
+ m, s = divmod(ts, 60)
31
  h, m = divmod(m, 60)
32
  return f"{h}h {m}m {s}s" if h > 0 else f"{m}m {s}s" if m > 0 else f"{s}s"
33
 
34
  class ParserState:
 
35
  def __init__(self):
36
  self.answer = ""
37
  self.thought = ""
38
  self.in_think = False
39
  self.start_time = 0
 
40
  self.total_think_time = 0.0
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def format_ui_response(state):
43
  collapsible = ""
44
  if state.thought or state.in_think:
45
  status = f"πŸŒ€ Thinking ({format_time(state.total_think_time)})" if state.in_think else f"βœ… Thought for {format_time(state.total_think_time)}"
46
  open_tag = "open" if state.in_think else ""
47
+ collapsible = f"<details {open_tag}><summary>{status}</summary><div style='color: #666; font-style: italic; border-left: 3px solid #facc15; padding-left: 10px; background: rgba(0,0,0,0.02);'>{state.thought}</div></details>"
48
  return f"{collapsible}\n\n{state.answer}"
49
 
50
+ # --- Gradio Handlers ---
 
 
 
51
 
52
  def generate_response(history, search_enabled, temp, top_p, max_tok, active_gen):
53
  if not history: return history
54
 
55
  query = history[-1][0]
56
+ prompt = query
57
 
 
58
  if search_enabled:
59
  history[-1][1] = "πŸ” Searching the web..."
60
  yield history
61
+ context = search_web(query)
62
+ if context:
63
+ prompt = f"Context from Web:\n{context}\n\nUser Question: {query}\n\nAnswer using the context above:"
64
+
 
 
 
 
65
  state = ParserState()
66
+ active_gen[0] = True
67
+
68
  try:
69
+ # llama-cpp-python streaming completion
70
+ stream = llm.create_chat_completion(
71
+ messages=[{"role": "user", "content": prompt}],
72
  temperature=temp,
73
  top_p=top_p,
74
  max_tokens=max_tok,
75
  stream=True
76
  )
77
+
78
  for chunk in stream:
79
  if not active_gen[0]: break
80
+
81
+ delta = chunk['choices'][0]['delta']
82
+ if 'content' in delta:
83
+ token = delta['content']
84
+
85
+ # Logic to handle <think> tags
86
+ if "<think>" in token:
87
+ state.in_think = True
88
+ state.start_time = time.perf_counter()
89
+ token = token.replace("<think>", "")
90
+
91
+ if "</think>" in token:
92
+ state.total_think_time += (time.perf_counter() - state.start_time)
93
+ state.in_think = False
94
+ token = token.replace("</think>", "")
95
+
96
+ if state.in_think:
97
+ state.thought += token
98
+ state.total_think_time = time.perf_counter() - state.start_time
99
+ else:
100
+ state.answer += token
101
+
102
  history[-1][1] = format_ui_response(state)
103
  yield history
104
+
105
  except Exception as e:
106
  history[-1][1] = f"Error: {str(e)}"
107
  yield history
108
+
109
+ # --- UI Layout ---
110
+
111
+ with gr.Blocks(theme=gr.themes.Soft(), css="footer {visibility: hidden}") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  active_gen = gr.State([False])
113
 
114
+ gr.Markdown("# πŸš€ Qwen3 Reasoning Engine\n*Integrated Llama-CPP with Web Search*")
115
+
116
  with gr.Row():
117
+ with gr.Column(scale=4):
118
+ chatbot = gr.Chatbot(height=500, show_label=False, bubble_full_width=False)
119
+ with gr.Column(scale=1):
120
+ search_toggle = gr.Checkbox(label="🌐 Web Search", value=False)
121
+ temp = gr.Slider(0.1, 1.2, 0.7, label="Temperature")
122
+ max_tok = gr.Slider(512, 8192, 2048, step=128, label="Max Tokens")
123
+ gr.Markdown("---")
124
+ stop_btn = gr.Button("⏹ Stop", variant="secondary")
125
+ clear_btn = gr.Button("πŸ—‘ Clear", variant="secondary")
126
 
 
 
127
  with gr.Row():
128
+ msg = gr.Textbox(placeholder="Enter your prompt here...", container=False, scale=7)
 
 
 
 
129
  submit_btn = gr.Button("Send", variant="primary", scale=1)
130
 
131
+ # Event Wiring
132
+ sub_ev = submit_btn.click(
133
+ lambda m, h: ("", h + [[m, None]]), [msg, chatbot], [msg, chatbot], queue=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  ).then(
135
+ generate_response, [chatbot, search_toggle, temp, gr.State(0.95), max_tok, active_gen], chatbot
136
  )
137
+
138
  msg.submit(
139
+ lambda m, h: ("", h + [[m, None]]), [msg, chatbot], [msg, chatbot], queue=False
 
 
 
 
 
 
 
 
 
 
 
 
140
  ).then(
141
+ generate_response, [chatbot, search_toggle, temp, gr.State(0.95), max_tok, active_gen], chatbot
142
  )
143
 
144
+ stop_btn.click(lambda: [False], None, active_gen, cancels=[sub_ev])
145
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
 
146
 
147
  if __name__ == "__main__":
148
  demo.launch(server_name="0.0.0.0", server_port=7860)