OrbitMC commited on
Commit
b673820
Β·
verified Β·
1 Parent(s): 6cf0909

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -127
app.py CHANGED
@@ -1,148 +1,82 @@
1
- import time
2
  import gradio as gr
3
- from llama_cpp import Llama
 
4
  from duckduckgo_search import DDGS
5
-
6
- # --- Initialize Model ---
7
- print("Loading model from Hugging Face...")
8
- llm = Llama.from_pretrained(
9
- repo_id="unsloth/Qwen3-0.6B-GGUF",
10
- filename="Qwen3-0.6B-BF16.gguf",
11
- n_ctx=32768,
12
- n_threads=None, # Automatically use all CPU cores
13
- verbose=False
 
 
 
14
  )
15
 
16
- # --- Logic Functions ---
17
-
18
  def search_web(query):
19
  try:
20
  with DDGS() as ddgs:
21
  results = [r for r in ddgs.text(query, max_results=3)]
22
- if not results: return None
23
- return "\n".join([f"Source: {r['title']}\nContent: {r['body']}" for r in results])
 
24
  except Exception as e:
25
  print(f"Search error: {e}")
26
- return None
27
-
28
- def format_time(seconds_float):
29
- ts = int(round(seconds_float))
30
- m, s = divmod(ts, 60)
31
- h, m = divmod(m, 60)
32
- return f"{h}h {m}m {s}s" if h > 0 else f"{m}m {s}s" if m > 0 else f"{s}s"
33
-
34
- class ParserState:
35
- def __init__(self):
36
- self.answer = ""
37
- self.thought = ""
38
- self.in_think = False
39
- self.start_time = 0
40
- self.total_think_time = 0.0
41
-
42
- def format_ui_response(state):
43
- collapsible = ""
44
- if state.thought or state.in_think:
45
- status = f"πŸŒ€ Thinking ({format_time(state.total_think_time)})" if state.in_think else f"βœ… Thought for {format_time(state.total_think_time)}"
46
- open_tag = "open" if state.in_think else ""
47
- collapsible = f"<details {open_tag}><summary>{status}</summary><div style='color: #666; font-style: italic; border-left: 3px solid #facc15; padding-left: 10px; background: rgba(0,0,0,0.02);'>{state.thought}</div></details>"
48
- return f"{collapsible}\n\n{state.answer}"
49
-
50
- # --- Gradio Handlers ---
51
-
52
- def generate_response(history, search_enabled, temp, top_p, max_tok, active_gen):
53
- if not history: return history
54
-
55
- query = history[-1][0]
56
- prompt = query
57
 
 
 
 
 
58
  if search_enabled:
59
- history[-1][1] = "πŸ” Searching the web..."
60
- yield history
61
- context = search_web(query)
62
- if context:
63
- prompt = f"Context from Web:\n{context}\n\nUser Question: {query}\n\nAnswer using the context above:"
64
-
65
- state = ParserState()
66
- active_gen[0] = True
67
-
68
- try:
69
- # llama-cpp-python streaming completion
70
- stream = llm.create_chat_completion(
71
- messages=[{"role": "user", "content": prompt}],
72
- temperature=temp,
73
- top_p=top_p,
74
- max_tokens=max_tok,
75
- stream=True
76
- )
77
 
78
- for chunk in stream:
79
- if not active_gen[0]: break
80
-
81
- delta = chunk['choices'][0]['delta']
82
- if 'content' in delta:
83
- token = delta['content']
84
-
85
- # Logic to handle <think> tags
86
- if "<think>" in token:
87
- state.in_think = True
88
- state.start_time = time.perf_counter()
89
- token = token.replace("<think>", "")
90
-
91
- if "</think>" in token:
92
- state.total_think_time += (time.perf_counter() - state.start_time)
93
- state.in_think = False
94
- token = token.replace("</think>", "")
95
-
96
- if state.in_think:
97
- state.thought += token
98
- state.total_think_time = time.perf_counter() - state.start_time
99
- else:
100
- state.answer += token
101
-
102
- history[-1][1] = format_ui_response(state)
103
- yield history
104
-
105
- except Exception as e:
106
- history[-1][1] = f"Error: {str(e)}"
107
- yield history
108
 
109
- # --- UI Layout ---
 
 
 
 
 
110
 
111
- with gr.Blocks(theme=gr.themes.Soft(), css="footer {visibility: hidden}") as demo:
112
- active_gen = gr.State([False])
113
-
114
- gr.Markdown("# πŸš€ Qwen3 Reasoning Engine\n*Integrated Llama-CPP with Web Search*")
115
 
116
  with gr.Row():
117
  with gr.Column(scale=4):
118
- chatbot = gr.Chatbot(height=500, show_label=False, bubble_full_width=False)
119
- with gr.Column(scale=1):
120
- search_toggle = gr.Checkbox(label="🌐 Web Search", value=False)
121
- temp = gr.Slider(0.1, 1.2, 0.7, label="Temperature")
122
- max_tok = gr.Slider(512, 8192, 2048, step=128, label="Max Tokens")
123
- gr.Markdown("---")
124
- stop_btn = gr.Button("⏹ Stop", variant="secondary")
125
- clear_btn = gr.Button("πŸ—‘ Clear", variant="secondary")
126
-
127
- with gr.Row():
128
- msg = gr.Textbox(placeholder="Enter your prompt here...", container=False, scale=7)
129
- submit_btn = gr.Button("Send", variant="primary", scale=1)
130
-
131
- # Event Wiring
132
- sub_ev = submit_btn.click(
133
- lambda m, h: ("", h + [[m, None]]), [msg, chatbot], [msg, chatbot], queue=False
134
- ).then(
135
- generate_response, [chatbot, search_toggle, temp, gr.State(0.95), max_tok, active_gen], chatbot
136
- )
137
-
138
- msg.submit(
139
- lambda m, h: ("", h + [[m, None]]), [msg, chatbot], [msg, chatbot], queue=False
140
- ).then(
141
- generate_response, [chatbot, search_toggle, temp, gr.State(0.95), max_tok, active_gen], chatbot
142
- )
143
-
144
- stop_btn.click(lambda: [False], None, active_gen, cancels=[sub_ev])
145
- clear_btn.click(lambda: None, None, chatbot, queue=False)
146
 
147
  if __name__ == "__main__":
148
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
  from duckduckgo_search import DDGS
5
+ from threading import Thread
6
+
7
+ # --- MODEL CONFIG ---
8
+ MODEL_ID = "Qwen/Qwen3-0.6B" # Pure HF Datacard
9
+
10
+ print(f"Loading model {MODEL_ID}...")
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ MODEL_ID,
14
+ device_map="auto",
15
+ torch_dtype=torch.float16,
16
+ low_cpu_mem_usage=True
17
  )
18
 
19
+ # --- WEB SEARCH ---
 
20
  def search_web(query):
21
  try:
22
  with DDGS() as ddgs:
23
  results = [r for r in ddgs.text(query, max_results=3)]
24
+ if not results: return ""
25
+ context = "\n".join([f"Source: {r['title']}\nContent: {r['body']}" for r in results])
26
+ return f"\n\nWeb Search Context:\n{context}\n"
27
  except Exception as e:
28
  print(f"Search error: {e}")
29
+ return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # --- INFERENCE ---
32
+ def stream_response(message, history, search_enabled, temperature, max_new_tokens):
33
+ # Prepare prompt
34
+ context = ""
35
  if search_enabled:
36
+ context = search_web(message)
37
+
38
+ # Simple Chat Template
39
+ full_prompt = f"User: {message}{context}\nAssistant:"
40
+
41
+ inputs = tokenizer([full_prompt], return_tensors="pt").to(model.device)
42
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
43
+
44
+ generation_kwargs = dict(
45
+ inputs,
46
+ streamer=streamer,
47
+ max_new_tokens=max_new_tokens,
48
+ do_sample=True,
49
+ temperature=temperature,
50
+ pad_token_id=tokenizer.eos_token_id
51
+ )
 
 
52
 
53
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
54
+ thread.start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ partial_text = ""
57
+ for new_text in streamer:
58
+ # Handle the thinking process tags if present in output
59
+ new_text = new_text.replace("<think>", "πŸ’­ *Thinking:* ").replace("</think>", "\n\n---\n\n")
60
+ partial_text += new_text
61
+ yield partial_text
62
 
63
+ # --- CLEAN UI ---
64
+ with gr.Blocks(theme=gr.themes.Default(primary_hue="orange", secondary_hue="gray")) as demo:
65
+ gr.Markdown("# πŸ›Έ Qwen3 Pure-Python Explorer")
 
66
 
67
  with gr.Row():
68
  with gr.Column(scale=4):
69
+ chatbot = gr.ChatInterface(
70
+ fn=stream_response,
71
+ additional_inputs=[
72
+ gr.Checkbox(label="🌐 Enable Web Search", value=False),
73
+ gr.Slider(0.1, 1.0, 0.7, label="Temperature"),
74
+ gr.Slider(128, 4096, 1024, label="Max Tokens"),
75
+ ],
76
+ fill_height=True
77
+ )
78
+
79
+ gr.Markdown("### Features:\n- βœ… **Zero C++ / Zero llama-cpp**\n- βœ… **Native HuggingFace Transformers**\n- βœ… **DuckDuckGo Integration**")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  if __name__ == "__main__":
82
  demo.launch(server_name="0.0.0.0", server_port=7860)