Spaces:

DevNumb
/

chatDeepSEEK

Sleeping

App Files Files Community

DevNumb commited on Nov 13, 2025

Commit

3d70cf4

verified ·

1 Parent(s): 9a14bb0

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -89

app.py CHANGED Viewed

@@ -1,83 +1,101 @@
 import gradio as gr
-from transformers import pipeline
 import torch
 import time
-# Initialize the pipeline
 @torch.no_grad()
 def load_model():
     print("Loading Qwen3-0.6B model...")
-    pipe = pipeline(
-        "text-generation",
-        model="Qwen/Qwen3-0.6B",
         torch_dtype=torch.float16,
         device_map="auto",
         trust_remote_code=True
     )
     print("Model loaded successfully!")
-    return pipe
 # Load the model
-pipe = load_model()
-def format_chat_template(messages):
     """
-    Format messages using the model's chat template
     """
-    try:
-        # Use the model's built-in chat template
-        formatted_prompt = pipe.tokenizer.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True
-        )
-        return formatted_prompt
-    except Exception as e:
-        # Fallback formatting
-        conversation = ""
-        for msg in messages:
-            if msg["role"] == "user":
-                conversation += f"User: {msg['content']}\n\nAssistant:"
-            elif msg["role"] == "assistant":
-                conversation += f" {msg['content']}\n\n"
-        return conversation
 def generate_response(message, history, temperature=0.7, max_length=512):
     """
     Generate a response using Qwen3-0.6B
     """
     try:
-        # Convert Gradio history to messages format
-        messages = []
-        for human_msg, assistant_msg in history:
-            messages.extend([
-                {"role": "user", "content": human_msg},
-                {"role": "assistant", "content": assistant_msg}
-            ])
-        # Add current message
-        messages.append({"role": "user", "content": message})
         # Generate response
-        formatted_prompt = format_chat_template(messages)
-        outputs = pipe(
-            formatted_prompt,
-            max_new_tokens=max_length,
-            temperature=temperature,
-            do_sample=True,
-            top_p=0.9,
-            repetition_penalty=1.1,
-            pad_token_id=pipe.tokenizer.eos_token_id,
-            eos_token_id=pipe.tokenizer.eos_token_id,
-            return_full_text=False  # Only return the generated part
-        )
-        response = outputs[0]['generated_text'].strip()
         # Clean up response
-        if "User:" in response:
-            response = response.split("User:")[0].strip()
         return response
@@ -215,6 +233,11 @@ custom_css = """
     text-fill-color: transparent;
     font-weight: 700 !important;
 }
 """
 # Create the Gradio interface
@@ -227,6 +250,18 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
         elem_classes="markdown-container"
     )
     with gr.Row(equal_height=False):
         with gr.Column(scale=3):
             chatbot = gr.Chatbot(
@@ -241,13 +276,18 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
             with gr.Row():
                 msg = gr.Textbox(
                     label="💭 Your message",
-                    placeholder="Ask me anything...",
                     lines=2,
                     scale=4,
-                    container=False
                 )
                 with gr.Column(scale=1):
-                    submit_btn = gr.Button("Send 🚀", size="lg")
             with gr.Row():
                 clear_btn = gr.Button("🗑️ Clear Chat", size="sm")
@@ -292,43 +332,44 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
                 - Adjust temperature for creativity
                 """)
-    # Event handlers
-    submit_event = msg.submit(
-        chat_interface,
-        inputs=[msg, chatbot, temperature, max_length],
-        outputs=[msg, chatbot]
-    )
-    submit_btn.click(
-        chat_interface,
-        inputs=[msg, chatbot, temperature, max_length],
-        outputs=[msg, chatbot]
-    )
-    clear_btn.click(
-        clear_chat,
-        outputs=[chatbot]
-    )
-    retry_btn.click(
-        retry_last_response,
-        inputs=[chatbot, temperature, max_length],
-        outputs=[chatbot]
-    )
-    # Additional examples
-    with gr.Accordion("💡 Example Prompts", open=False):
-        gr.Examples(
-            examples=[
-                "Explain quantum computing in simple terms",
-                "Write a short poem about artificial intelligence",
-                "What are the benefits of renewable energy?",
-                "How do I learn programming effectively?",
-                "Tell me an interesting fact about space"
-            ],
-            inputs=msg,
-            label="Click any example to try it out!"
         )
 if __name__ == "__main__":
     demo.launch(

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import time
+# Initialize the model and tokenizer
 @torch.no_grad()
 def load_model():
     print("Loading Qwen3-0.6B model...")
+    # Load tokenizer and model
+    tokenizer = AutoTokenizer.from_pretrained(
+        "Qwen/Qwen3-0.6B",
+        trust_remote_code=True
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        "Qwen/Qwen3-0.6B",
         torch_dtype=torch.float16,
         device_map="auto",
         trust_remote_code=True
     )
     print("Model loaded successfully!")
+    return tokenizer, model
 # Load the model
+try:
+    tokenizer, model = load_model()
+    print(f"Model device: {model.device}")
+    print(f"Model dtype: {model.dtype}")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    tokenizer, model = None, None
+def format_messages(history, new_message):
     """
+    Format chat history and new message into the required format
     """
+    messages = []
+    # Add history
+    for human_msg, assistant_msg in history:
+        messages.extend([
+            {"role": "user", "content": human_msg},
+            {"role": "assistant", "content": assistant_msg}
+        ])
+    # Add new message
+    messages.append({"role": "user", "content": new_message})
+    return messages
 def generate_response(message, history, temperature=0.7, max_length=512):
     """
     Generate a response using Qwen3-0.6B
     """
+    if tokenizer is None or model is None:
+        return "Model is not loaded properly. Please check the logs."
     try:
+        # Format messages
+        messages = format_messages(history, message)
+        # Apply chat template
+        text = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        # Tokenize
+        model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
         # Generate response
+        with torch.no_grad():
+            generated_ids = model.generate(
+                **model_inputs,
+                max_new_tokens=max_length,
+                temperature=temperature,
+                do_sample=True,
+                top_p=0.9,
+                repetition_penalty=1.1,
+                eos_token_id=tokenizer.eos_token_id,
+                pad_token_id=tokenizer.eos_token_id
+            )
+        # Decode response
+        generated_ids = [
+            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+        ]
+        response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
         # Clean up response
+        response = response.strip()
+        if "<|im_end|>" in response:
+            response = response.split("<|im_end|>")[0].strip()
         return response
     text-fill-color: transparent;
     font-weight: 700 !important;
 }
+.loading {
+    opacity: 0.7;
+    pointer-events: none;
+}
 """
 # Create the Gradio interface
         elem_classes="markdown-container"
     )
+    # Show loading status
+    if tokenizer is None or model is None:
+        gr.Markdown("""
+        ## ⚠️ Model Loading Issue
+        The model is taking longer than expected to load. This might be due to:
+        - Large model size download
+        - Hugging Face API limitations
+        - Insufficient resources
+        Please wait a few minutes and refresh the page.
+        """)
     with gr.Row(equal_height=False):
         with gr.Column(scale=3):
             chatbot = gr.Chatbot(
             with gr.Row():
                 msg = gr.Textbox(
                     label="💭 Your message",
+                    placeholder="Ask me anything..." if tokenizer and model else "Model is loading...",
                     lines=2,
                     scale=4,
+                    container=False,
+                    interactive=tokenizer is not None and model is not None
                 )
                 with gr.Column(scale=1):
+                    submit_btn = gr.Button(
+                        "Send 🚀" if tokenizer and model else "Loading...",
+                        size="lg",
+                        interactive=tokenizer is not None and model is not None
+                    )
             with gr.Row():
                 clear_btn = gr.Button("🗑️ Clear Chat", size="sm")
                 - Adjust temperature for creativity
                 """)
+    # Event handlers (only if model is loaded)
+    if tokenizer is not None and model is not None:
+        submit_event = msg.submit(
+            chat_interface,
+            inputs=[msg, chatbot, temperature, max_length],
+            outputs=[msg, chatbot]
+        )
+        submit_btn.click(
+            chat_interface,
+            inputs=[msg, chatbot, temperature, max_length],
+            outputs=[msg, chatbot]
+        )
+        clear_btn.click(
+            clear_chat,
+            outputs=[chatbot]
+        )
+        retry_btn.click(
+            retry_last_response,
+            inputs=[chatbot, temperature, max_length],
+            outputs=[chatbot]
         )
+        # Additional examples
+        with gr.Accordion("💡 Example Prompts", open=False):
+            gr.Examples(
+                examples=[
+                    "Explain quantum computing in simple terms",
+                    "Write a short poem about artificial intelligence",
+                    "What are the benefits of renewable energy?",
+                    "How do I learn programming effectively?",
+                    "Tell me an interesting fact about space"
+                ],
+                inputs=msg,
+                label="Click any example to try it out!"
+            )
 if __name__ == "__main__":
     demo.launch(