Spaces:

OrbitMC
/

slm

Sleeping

App Files Files Community

OrbitMC commited on Feb 7

Commit

e7b5bc2

verified ·

1 Parent(s): b796c25

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -144

app.py CHANGED Viewed

@@ -1,215 +1,148 @@
 import time
 import gradio as gr
-from openai import OpenAI
 from duckduckgo_search import DDGS
-import json
-# --- Logic Functions ---
-client = OpenAI(base_url="http://localhost:8080/v1", api_key="no-key-required")
 def search_web(query):
     try:
         with DDGS() as ddgs:
-            results = [r for r in ddgs.text(query, max_results=5)]
-            if not results:
-                return None
-            context = "\n".join([f"Source: {r['title']}\nContent: {r['body']}" for r in results])
-            return context
     except Exception as e:
         print(f"Search error: {e}")
         return None
 def format_time(seconds_float):
-    total_seconds = int(round(seconds_float))
-    m, s = divmod(total_seconds, 60)
     h, m = divmod(m, 60)
     return f"{h}h {m}m {s}s" if h > 0 else f"{m}m {s}s" if m > 0 else f"{s}s"
 class ParserState:
-    __slots__ = ['answer', 'thought', 'in_think', 'start_time', 'last_pos', 'total_think_time']
     def __init__(self):
         self.answer = ""
         self.thought = ""
         self.in_think = False
         self.start_time = 0
-        self.last_pos = 0
         self.total_think_time = 0.0
-def parse_response(text, state):
-    buffer = text[state.last_pos:]
-    state.last_pos = len(text)
-    while buffer:
-        if not state.in_think:
-            think_start = buffer.find('<think>')
-            if think_start != -1:
-                state.answer += buffer[:think_start]
-                state.in_think = True
-                state.start_time = time.perf_counter()
-                buffer = buffer[think_start + 7:]
-            else:
-                state.answer += buffer
-                break
-        else:
-            think_end = buffer.find('</think>')
-            if think_end != -1:
-                state.thought += buffer[:think_end]
-                state.total_think_time += (time.perf_counter() - state.start_time)
-                state.in_think = False
-                buffer = buffer[think_end + 8:]
-            else:
-                state.thought += buffer
-                break
-    return state
 def format_ui_response(state):
     collapsible = ""
     if state.thought or state.in_think:
         status = f"🌀 Thinking ({format_time(state.total_think_time)})" if state.in_think else f"✅ Thought for {format_time(state.total_think_time)}"
         open_tag = "open" if state.in_think else ""
-        collapsible = f"<details {open_tag}><summary>{status}</summary><div class='thinking-container'>{state.thought}</div></details>"
     return f"{collapsible}\n\n{state.answer}"
-# --- Gradio UI Handlers ---
-def user_msg(user_input, history):
-    return "", history + [[user_input, None]]
 def generate_response(history, search_enabled, temp, top_p, max_tok, active_gen):
     if not history: return history
     query = history[-1][0]
-    full_prompt = query
-    # Perform Search if enabled
     if search_enabled:
         history[-1][1] = "🔍 Searching the web..."
         yield history
-        search_results = search_web(query)
-        if search_results:
-            full_prompt = f"User Query: {query}\n\nWeb Search Results:\n{search_results}\n\nInstruction: Use the search results to provide a comprehensive answer."
-        else:
-            history[-1][1] = "🔍 No search results found. Proceeding with internal knowledge..."
-            yield history
-    messages = [{"role": "user", "content": full_prompt}]
     state = ParserState()
-    full_text = ""
     try:
-        stream = client.chat.completions.create(
-            model="local",
-            messages=messages,
             temperature=temp,
             top_p=top_p,
             max_tokens=max_tok,
             stream=True
         )
         for chunk in stream:
             if not active_gen[0]: break
-            if chunk.choices[0].delta.content:
-                full_text += chunk.choices[0].delta.content
-                state = parse_response(full_text, state)
                 history[-1][1] = format_ui_response(state)
                 yield history
     except Exception as e:
         history[-1][1] = f"Error: {str(e)}"
         yield history
-    finally:
-        active_gen[0] = False
-# --- UI Setup ---
-CSS = """
-.thinking-container {
-    border-left: 3px solid #facc15;
-    padding: 10px;
-    margin: 5px 0;
-    background: rgba(250, 204, 21, 0.05);
-    font-style: italic;
-    color: #666;
-}
-details {
-    border: 1px solid #ddd;
-    border-radius: 8px;
-    padding: 5px 10px;
-    margin-bottom: 10px;
-}
-summary {
-    cursor: pointer;
-    font-weight: bold;
-    color: #444;
-}
-"""
-with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
     active_gen = gr.State([False])
     with gr.Row():
-        with gr.Column(scale=8):
-            gr.Markdown("# 🧠 Qwen3 Web Explorer\n*High-speed reasoning with real-time web access*")
-        with gr.Column(scale=2):
-            search_toggle = gr.Checkbox(label="🌐 Enable Web Search", value=False)
-    chatbot = gr.Chatbot(height=550, show_label=False, bubble_full_width=False)
     with gr.Row():
-        msg = gr.Textbox(
-            placeholder="Ask me anything...",
-            container=False,
-            scale=7
-        )
         submit_btn = gr.Button("Send", variant="primary", scale=1)
-    with gr.Row():
-        stop_btn = gr.Button("⏹ Stop", variant="secondary")
-        regen_btn = gr.Button("🔄 Regenerate", variant="secondary")
-        undo_btn = gr.Button("↩️ Undo", variant="secondary")
-        clear_btn = gr.Button("🗑 Clear Chat", variant="secondary")
-    with gr.Accordion("Advanced Settings", open=False):
-        with gr.Row():
-            temp = gr.Slider(0.1, 1.5, 0.6, label="Temperature")
-            top_p = gr.Slider(0.1, 1.0, 0.95, label="Top-p")
-            max_tok = gr.Slider(1024, 32768, 4096, step=128, label="Max Tokens")
-    # Functions
-    def start_gen(): return [True]
-    def stop_gen(): return [False]
-    def undo(history):
-        if len(history) > 0: history.pop()
-        return history
-    submit_event = submit_btn.click(
-        user_msg, [msg, chatbot], [msg, chatbot], queue=False
-    ).then(
-        start_gen, None, active_gen
     ).then(
-        generate_response, [chatbot, search_toggle, temp, top_p, max_tok, active_gen], chatbot
     )
     msg.submit(
-        user_msg, [msg, chatbot], [msg, chatbot], queue=False
-    ).then(
-        start_gen, None, active_gen
-    ).then(
-        generate_response, [chatbot, search_toggle, temp, top_p, max_tok, active_gen], chatbot
-    )
-    regen_btn.click(
-        lambda history: (history[-1][0], history[:-1]), [chatbot], [msg, chatbot], queue=False
-    ).then(
-        user_msg, [msg, chatbot], [msg, chatbot], queue=False
-    ).then(
-        start_gen, None, active_gen
     ).then(
-        generate_response, [chatbot, search_toggle, temp, top_p, max_tok, active_gen], chatbot
     )
-    stop_btn.click(stop_gen, None, active_gen, cancels=[submit_event])
-    undo_btn.click(undo, [chatbot], [chatbot])
-    clear_btn.click(lambda: None, None, chatbot)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import time
 import gradio as gr
+from llama_cpp import Llama
 from duckduckgo_search import DDGS
+# --- Initialize Model ---
+print("Loading model from Hugging Face...")
+llm = Llama.from_pretrained(
+    repo_id="unsloth/Qwen3-0.6B-GGUF",
+    filename="Qwen3-0.6B-BF16.gguf",
+    n_ctx=32768,
+    n_threads=None, # Automatically use all CPU cores
+    verbose=False
+)
+# --- Logic Functions ---
 def search_web(query):
     try:
         with DDGS() as ddgs:
+            results = [r for r in ddgs.text(query, max_results=3)]
+            if not results: return None
+            return "\n".join([f"Source: {r['title']}\nContent: {r['body']}" for r in results])
     except Exception as e:
         print(f"Search error: {e}")
         return None
 def format_time(seconds_float):
+    ts = int(round(seconds_float))
+    m, s = divmod(ts, 60)
     h, m = divmod(m, 60)
     return f"{h}h {m}m {s}s" if h > 0 else f"{m}m {s}s" if m > 0 else f"{s}s"
 class ParserState:
     def __init__(self):
         self.answer = ""
         self.thought = ""
         self.in_think = False
         self.start_time = 0
         self.total_think_time = 0.0
 def format_ui_response(state):
     collapsible = ""
     if state.thought or state.in_think:
         status = f"🌀 Thinking ({format_time(state.total_think_time)})" if state.in_think else f"✅ Thought for {format_time(state.total_think_time)}"
         open_tag = "open" if state.in_think else ""
+        collapsible = f"<details {open_tag}><summary>{status}</summary><div style='color: #666; font-style: italic; border-left: 3px solid #facc15; padding-left: 10px; background: rgba(0,0,0,0.02);'>{state.thought}</div></details>"
     return f"{collapsible}\n\n{state.answer}"
+# --- Gradio Handlers ---
 def generate_response(history, search_enabled, temp, top_p, max_tok, active_gen):
     if not history: return history
     query = history[-1][0]
+    prompt = query
     if search_enabled:
         history[-1][1] = "🔍 Searching the web..."
         yield history
+        context = search_web(query)
+        if context:
+            prompt = f"Context from Web:\n{context}\n\nUser Question: {query}\n\nAnswer using the context above:"
     state = ParserState()
+    active_gen[0] = True
     try:
+        # llama-cpp-python streaming completion
+        stream = llm.create_chat_completion(
+            messages=[{"role": "user", "content": prompt}],
             temperature=temp,
             top_p=top_p,
             max_tokens=max_tok,
             stream=True
         )
         for chunk in stream:
             if not active_gen[0]: break
+            delta = chunk['choices'][0]['delta']
+            if 'content' in delta:
+                token = delta['content']
+                # Logic to handle <think> tags
+                if "<think>" in token:
+                    state.in_think = True
+                    state.start_time = time.perf_counter()
+                    token = token.replace("<think>", "")
+                if "</think>" in token:
+                    state.total_think_time += (time.perf_counter() - state.start_time)
+                    state.in_think = False
+                    token = token.replace("</think>", "")
+                if state.in_think:
+                    state.thought += token
+                    state.total_think_time = time.perf_counter() - state.start_time
+                else:
+                    state.answer += token
                 history[-1][1] = format_ui_response(state)
                 yield history
     except Exception as e:
         history[-1][1] = f"Error: {str(e)}"
         yield history
+# --- UI Layout ---
+with gr.Blocks(theme=gr.themes.Soft(), css="footer {visibility: hidden}") as demo:
     active_gen = gr.State([False])
+    gr.Markdown("# 🚀 Qwen3 Reasoning Engine\n*Integrated Llama-CPP with Web Search*")
     with gr.Row():
+        with gr.Column(scale=4):
+            chatbot = gr.Chatbot(height=500, show_label=False, bubble_full_width=False)
+        with gr.Column(scale=1):
+            search_toggle = gr.Checkbox(label="🌐 Web Search", value=False)
+            temp = gr.Slider(0.1, 1.2, 0.7, label="Temperature")
+            max_tok = gr.Slider(512, 8192, 2048, step=128, label="Max Tokens")
+            gr.Markdown("---")
+            stop_btn = gr.Button("⏹ Stop", variant="secondary")
+            clear_btn = gr.Button("🗑 Clear", variant="secondary")
     with gr.Row():
+        msg = gr.Textbox(placeholder="Enter your prompt here...", container=False, scale=7)
         submit_btn = gr.Button("Send", variant="primary", scale=1)
+    # Event Wiring
+    sub_ev = submit_btn.click(
+        lambda m, h: ("", h + [[m, None]]), [msg, chatbot], [msg, chatbot], queue=False
     ).then(
+        generate_response, [chatbot, search_toggle, temp, gr.State(0.95), max_tok, active_gen], chatbot
     )
     msg.submit(
+        lambda m, h: ("", h + [[m, None]]), [msg, chatbot], [msg, chatbot], queue=False
     ).then(
+        generate_response, [chatbot, search_toggle, temp, gr.State(0.95), max_tok, active_gen], chatbot
     )
+    stop_btn.click(lambda: [False], None, active_gen, cancels=[sub_ev])
+    clear_btn.click(lambda: None, None, chatbot, queue=False)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)