| | import gradio as gr |
| | import torch |
| | import re |
| | from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer |
| | from duckduckgo_search import DDGS |
| | from threading import Thread |
| |
|
| | |
| | MODEL_ID = "Qwen/Qwen3-0.6B" |
| | print("Loading model and tokenizer...") |
| |
|
| | tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) |
| | model = AutoModelForCausalLM.from_pretrained( |
| | MODEL_ID, |
| | torch_dtype="auto", |
| | device_map="auto", |
| | trust_remote_code=True |
| | ) |
| |
|
| | |
| | def web_search(query): |
| | try: |
| | with DDGS() as ddgs: |
| | results = list(ddgs.text(query, max_results=3)) |
| | if not results: return "" |
| | blob = "\n\nSearch Results:\n" |
| | for r in results: |
| | blob += f"- {r['title']}: {r['body']}\n" |
| | return blob |
| | except: |
| | return "" |
| |
|
| | |
| | CSS = """ |
| | .thought-box { |
| | background-color: rgba(255, 255, 255, 0.05); |
| | border-left: 4px solid #facc15; |
| | padding: 10px; |
| | margin: 10px 0; |
| | font-style: italic; |
| | color: #9ca3af; |
| | } |
| | details summary { |
| | cursor: pointer; |
| | color: #facc15; |
| | font-weight: bold; |
| | } |
| | """ |
| |
|
| | def parse_output(text): |
| | """Parses <think> tags into a clean UI format.""" |
| | if "<think>" in text: |
| | parts = text.split("</think>") |
| | if len(parts) > 1: |
| | |
| | thought = parts[0].replace("<think>", "").strip() |
| | answer = parts[1].strip() |
| | return f"<details open><summary>π Thought Process</summary><div class='thought-box'>{thought}</div></details>\n\n{answer}" |
| | else: |
| | |
| | thought = parts[0].replace("<think>", "").strip() |
| | return f"<details open><summary>π Thinking...</summary><div class='thought-box'>{thought}</div></details>" |
| | return text |
| |
|
| | |
| | def chat(message, history, search_enabled, temperature, max_tokens): |
| | |
| | search_context = "" |
| | if search_enabled: |
| | search_context = web_search(message) |
| | |
| | |
| | |
| | conversation = [] |
| | for user_msg, assistant_msg in history: |
| | conversation.append({"role": "user", "content": user_msg}) |
| | if assistant_msg: |
| | |
| | clean_assistant = re.sub(r'<details.*?</details>', '', assistant_msg, flags=re.DOTALL).strip() |
| | conversation.append({"role": "assistant", "content": clean_assistant}) |
| | |
| | user_content = message + search_context |
| | conversation.append({"role": "user", "content": user_content}) |
| | |
| | input_ids = tokenizer.apply_chat_template( |
| | conversation, |
| | add_generation_prompt=True, |
| | return_tensors="pt" |
| | ).to(model.device) |
| |
|
| | |
| | streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) |
| | |
| | generate_kwargs = dict( |
| | input_ids=input_ids, |
| | streamer=streamer, |
| | max_new_tokens=max_tokens, |
| | temperature=temperature, |
| | do_sample=True, |
| | pad_token_id=tokenizer.eos_token_id, |
| | |
| | eos_token_id=[tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|im_end|>")] |
| | ) |
| |
|
| | thread = Thread(target=model.generate, kwargs=generate_kwargs) |
| | thread.start() |
| |
|
| | buffer = "" |
| | for new_text in streamer: |
| | |
| | if "User:" in new_text or "<|im_start|>" in new_text: |
| | break |
| | |
| | buffer += new_text |
| | yield parse_output(buffer) |
| |
|
| | |
| | with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo: |
| | gr.HTML("<h1>π§ Qwen3 Reasoning Lab</h1>") |
| | |
| | with gr.Row(): |
| | with gr.Column(scale=4): |
| | chat_box = gr.Chatbot(height=600, label="Qwen3-0.6B") |
| | msg_input = gr.Textbox(placeholder="Ask a logic question...", show_label=False) |
| | |
| | with gr.Column(scale=1): |
| | search_toggle = gr.Checkbox(label="π Web Search (DDG)", value=False) |
| | temp_slider = gr.Slider(0.1, 1.0, 0.7, label="Temperature") |
| | token_slider = gr.Slider(512, 4096, 1024, label="Max Tokens") |
| | gr.Markdown(""" |
| | ### Tips: |
| | - **Thinking:** This model is trained for Chain-of-Thought. |
| | - **Self-Talk Fix:** We use stop sequences to prevent the AI from acting as 'User'. |
| | """) |
| | clear_btn = gr.Button("π Clear Chat") |
| |
|
| | |
| | chat_event = msg_input.submit( |
| | lambda x, y: (x, y + [[x, None]]), |
| | [msg_input, chat_box], |
| | [msg_input, chat_box], |
| | queue=False |
| | ).then( |
| | chat, |
| | [msg_input, chat_box, search_toggle, temp_slider, token_slider], |
| | chat_box |
| | ) |
| | |
| | clear_btn.click(lambda: None, None, chat_box, queue=False) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch(server_name="0.0.0.0", server_port=7860) |