Spaces:

NeoPy
/

anycoder-0dc45528

Runtime error

App Files Files Community

NeoPy commited on Nov 29, 2025

Commit

32d5c15

verified ·

1 Parent(s): 25e5623

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +120 -96
requirements.txt +6 -6

app.py CHANGED Viewed

@@ -1,12 +1,33 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-import os
-# Initialize the client
-client = InferenceClient(
-    model="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
-    token=os.getenv("HF_TOKEN")
-)
 # Default system prompts
 SYSTEM_PROMPTS = {
@@ -18,19 +39,27 @@ SYSTEM_PROMPTS = {
     "Custom": ""
 }
-def format_thinking(content):
-    """Format thinking tags for display"""
-    if "" in content:
-        parts = content.split("" in part:
-                think_content, rest = part.split("", 1)
-                formatted += f"\n\n<details><summary>💭 Thinking Process</summary>\n\n{think_content.strip()}\n\n</details>\n\n{rest}"
-            else:
-                formatted += part
-        return formatted
-    return content
-def chat(message, history, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p, show_thinking):
     """Main chat function with streaming support"""
     # Determine system prompt
     if system_prompt_choice == "Custom":
@@ -38,54 +67,53 @@ def chat(message, history, system_prompt_choice, custom_system_prompt, temperatu
     else:
         system_content = SYSTEM_PROMPTS.get(system_prompt_choice, SYSTEM_PROMPTS["Default Assistant"])
-    # Build messages
-    messages = [{"role": "system", "content": system_content}]
-    # Add history
     for msg in history:
-        if msg["role"] == "user":
-            messages.append({"role": "user", "content": msg["content"]})
-        elif msg["role"] == "assistant":
-            # Clean up thinking tags from history
-            content = msg["content"]
-            if "<details>" in content:
-                # Remove the formatted thinking for API calls
-                import re
-                content = re.sub(r'<details>.*?</details>', '', content, flags=re.DOTALL)
-            messages.append({"role": "assistant", "content": content.strip()})
-    # Add current message
     messages.append({"role": "user", "content": message})
     try:
         response = ""
-        stream = client.chat_completion(
-            messages=messages,
-            max_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            stream=True
-        )
-        for chunk in stream:
-            if chunk.choices[0].delta.content:
-                response += chunk.choices[0].delta.content
-                # Format thinking if enabled
-                if show_thinking:
-                    yield format_thinking(response)
-                else:
-                    # Hide thinking content
-                    display_response = response
-                    if "" in display_response:
-                            import re
-                            display_response = re.sub(r'', '', display_response, flags=re.DOTALL)
-                        else:
-                            # Still thinking, show placeholder
-                            display_response = "🤔 *Thinking...*"
-                    yield display_response.strip()
     except Exception as e:
-        yield f"❌ Error: {str(e)}\n\nPlease check your HF_TOKEN and try again."
 def clear_chat():
     """Clear the chat history"""
@@ -129,44 +157,46 @@ css = """
 .header-container a:hover {
     text-decoration: underline;
 }
-.parameter-box {
     background: var(--background-fill-secondary);
-    padding: 15px;
     border-radius: 8px;
-    margin-top: 10px;
 }
 .chatbot-container {
     min-height: 500px;
 }
-footer {
-    text-align: center;
-    margin-top: 20px;
-    padding: 10px;
-    color: var(--body-text-color-subdued);
-}
 """
 # Build the interface
 with gr.Blocks(
-    title="DeepSeek R1 Chatbot",
     theme=gr.themes.Soft(),
     css=css,
     fill_height=True,
     footer_links=[
         {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
-        {"label": "Model", "url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"}
     ]
 ) as demo:
     # Header
     gr.HTML("""
         <div class="header-container">
-            <h1>🧠 DeepSeek R1 Chatbot</h1>
-            <p>Powered by DeepSeek-R1-0528-Qwen3-8B with reasoning capabilities</p>
             <p><a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">Built with anycoder</a></p>
         </div>
     """)
     with gr.Row():
         # Main chat column
         with gr.Column(scale=3):
@@ -175,7 +205,6 @@ with gr.Blocks(
                 height=500,
                 type="messages",
                 show_copy_button=True,
-                avatar_images=(None, "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg"),
                 render_markdown=True,
                 elem_classes=["chatbot-container"]
             )
@@ -226,12 +255,12 @@ with gr.Blocks(
                 )
                 max_tokens = gr.Slider(
-                    minimum=64,
-                    maximum=4096,
-                    value=1024,
-                    step=64,
                     label="Max Tokens",
-                    info="Maximum response length"
                 )
                 top_p = gr.Slider(
@@ -243,13 +272,6 @@ with gr.Blocks(
                     info="Nucleus sampling parameter"
                 )
-            with gr.Accordion("Display Options", open=False):
-                show_thinking = gr.Checkbox(
-                    value=True,
-                    label="Show Thinking Process",
-                    info="Display the model's reasoning steps"
-                )
             # Export output
             export_output = gr.Textbox(
                 label="Exported Chat",
@@ -262,11 +284,11 @@ with gr.Blocks(
     gr.Markdown("### 💡 Example Prompts")
     gr.Examples(
         examples=[
-            ["Explain quantum computing in simple terms"],
-            ["Write a haiku about artificial intelligence"],
-            ["What's the time complexity of quicksort and why?"],
-            ["Help me brainstorm ideas for a sustainable business"],
-            ["Solve this step by step: If 3x + 7 = 22, what is x?"],
         ],
         inputs=msg,
         label=""
@@ -287,7 +309,7 @@ with gr.Blocks(
             history.append({"role": "user", "content": message})
         return "", history
-    def bot_response(history, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p, show_thinking):
         if not history:
             yield history
             return
@@ -297,11 +319,11 @@ with gr.Blocks(
         history.append({"role": "assistant", "content": ""})
-        for response in chat(user_msg, history_for_api, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p, show_thinking):
             history[-1]["content"] = response
             yield history
-    def regenerate(history, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p, show_thinking):
         if len(history) >= 2:
             # Remove last assistant message
             history = history[:-1]
@@ -311,7 +333,7 @@ with gr.Blocks(
             history.append({"role": "assistant", "content": ""})
-            for response in chat(user_msg, history_for_api, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p, show_thinking):
                 history[-1]["content"] = response
                 yield history
         else:
@@ -329,7 +351,7 @@ with gr.Blocks(
         queue=False
     ).then(
         bot_response,
-        inputs=[chatbot, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p, show_thinking],
         outputs=[chatbot]
     )
@@ -340,7 +362,7 @@ with gr.Blocks(
         queue=False
     ).then(
         bot_response,
-        inputs=[chatbot, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p, show_thinking],
         outputs=[chatbot]
     )
@@ -351,7 +373,7 @@ with gr.Blocks(
     regenerate_btn.click(
         regenerate,
-        inputs=[chatbot, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p, show_thinking],
         outputs=[chatbot]
     )
@@ -362,4 +384,6 @@ with gr.Blocks(
     )
 if __name__ == "__main__":
     demo.launch()

+=%= app.py =%=
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+import torch
+from threading import Thread
+import re
+# Model configuration - using a smaller model that works well on CPU
+MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+# Global variables for model and tokenizer
+model = None
+tokenizer = None
+def load_model():
+    """Load the model and tokenizer"""
+    global model, tokenizer
+    if model is None:
+        print("Loading model... This may take a moment on CPU.")
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            torch_dtype=torch.float32,
+            device_map="cpu",
+            low_cpu_mem_usage=True
+        )
+        print("Model loaded successfully!")
+    return model, tokenizer
 # Default system prompts
 SYSTEM_PROMPTS = {
     "Custom": ""
 }
+def format_chat_prompt(messages, system_prompt):
+    """Format messages for TinyLlama chat format"""
+    formatted = f"<|system|>\n{system_prompt}</s>\n"
+    for msg in messages:
+        if msg["role"] == "user":
+            formatted += f"<|user|>\n{msg['content']}</s>\n"
+        elif msg["role"] == "assistant":
+            formatted += f"<|assistant|>\n{msg['content']}</s>\n"
+    formatted += "<|assistant|>\n"
+    return formatted
+def chat(message, history, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p):
     """Main chat function with streaming support"""
+    global model, tokenizer
+    # Load model if not loaded
+    if model is None:
+        yield "⏳ Loading model for the first time... Please wait (this may take 1-2 minutes on CPU)..."
+        load_model()
     # Determine system prompt
     if system_prompt_choice == "Custom":
     else:
         system_content = SYSTEM_PROMPTS.get(system_prompt_choice, SYSTEM_PROMPTS["Default Assistant"])
+    # Build messages list
+    messages = []
     for msg in history:
+        if msg["role"] in ["user", "assistant"]:
+            messages.append({"role": msg["role"], "content": msg["content"]})
     messages.append({"role": "user", "content": message})
     try:
+        # Format the prompt
+        prompt = format_chat_prompt(messages, system_content)
+        # Tokenize
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
+        # Set up streamer
+        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+        # Generation parameters
+        generation_kwargs = {
+            "input_ids": inputs["input_ids"],
+            "attention_mask": inputs["attention_mask"],
+            "max_new_tokens": max_tokens,
+            "temperature": temperature if temperature > 0 else 0.1,
+            "top_p": top_p,
+            "do_sample": temperature > 0,
+            "streamer": streamer,
+            "pad_token_id": tokenizer.eos_token_id,
+            "eos_token_id": tokenizer.eos_token_id,
+        }
+        # Run generation in a separate thread
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        # Stream the response
         response = ""
+        for new_text in streamer:
+            response += new_text
+            # Clean up any remaining special tokens
+            clean_response = response.replace("</s>", "").strip()
+            yield clean_response
+        thread.join()
     except Exception as e:
+        yield f"❌ Error: {str(e)}\n\nPlease try again with a shorter message or lower max tokens."
 def clear_chat():
     """Clear the chat history"""
 .header-container a:hover {
     text-decoration: underline;
 }
+.info-box {
     background: var(--background-fill-secondary);
+    padding: 10px 15px;
     border-radius: 8px;
+    margin: 10px 0;
+    border-left: 4px solid #667eea;
 }
 .chatbot-container {
     min-height: 500px;
 }
 """
 # Build the interface
 with gr.Blocks(
+    title="TinyLlama Chatbot (CPU)",
     theme=gr.themes.Soft(),
     css=css,
     fill_height=True,
     footer_links=[
         {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
+        {"label": "Model", "url": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0"}
     ]
 ) as demo:
     # Header
     gr.HTML("""
         <div class="header-container">
+            <h1>🦙 TinyLlama Chatbot</h1>
+            <p>Powered by TinyLlama-1.1B-Chat - Running locally on CPU</p>
             <p><a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">Built with anycoder</a></p>
         </div>
     """)
+    gr.HTML("""
+        <div class="info-box">
+            ℹ️ <strong>CPU Mode:</strong> This chatbot runs entirely on CPU without any API calls.
+            First response may take longer as the model loads. Responses are generated locally.
+        </div>
+    """)
     with gr.Row():
         # Main chat column
         with gr.Column(scale=3):
                 height=500,
                 type="messages",
                 show_copy_button=True,
                 render_markdown=True,
                 elem_classes=["chatbot-container"]
             )
                 )
                 max_tokens = gr.Slider(
+                    minimum=32,
+                    maximum=512,
+                    value=256,
+                    step=32,
                     label="Max Tokens",
+                    info="Maximum response length (lower = faster on CPU)"
                 )
                 top_p = gr.Slider(
                     info="Nucleus sampling parameter"
                 )
             # Export output
             export_output = gr.Textbox(
                 label="Exported Chat",
     gr.Markdown("### 💡 Example Prompts")
     gr.Examples(
         examples=[
+            ["Explain what machine learning is in simple terms"],
+            ["Write a short poem about the ocean"],
+            ["What are three tips for staying productive?"],
+            ["Tell me a fun fact about space"],
+            ["How do I make a simple pasta dish?"],
         ],
         inputs=msg,
         label=""
             history.append({"role": "user", "content": message})
         return "", history
+    def bot_response(history, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p):
         if not history:
             yield history
             return
         history.append({"role": "assistant", "content": ""})
+        for response in chat(user_msg, history_for_api, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p):
             history[-1]["content"] = response
             yield history
+    def regenerate(history, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p):
         if len(history) >= 2:
             # Remove last assistant message
             history = history[:-1]
             history.append({"role": "assistant", "content": ""})
+            for response in chat(user_msg, history_for_api, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p):
                 history[-1]["content"] = response
                 yield history
         else:
         queue=False
     ).then(
         bot_response,
+        inputs=[chatbot, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p],
         outputs=[chatbot]
     )
         queue=False
     ).then(
         bot_response,
+        inputs=[chatbot, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p],
         outputs=[chatbot]
     )
     regenerate_btn.click(
         regenerate,
+        inputs=[chatbot, system_prompt_choice, custom_system_prompt, temperature, max_tokens, top_p],
         outputs=[chatbot]
     )
     )
 if __name__ == "__main__":
+    # Pre-load model on startup (optional - can be commented out for faster startup)
+    print("Starting TinyLlama Chatbot...")
     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,12 +1,12 @@
-huggingface_hub
 gradio
-requests
-Pillow
 git+https://github.com/huggingface/transformers
 torch
-tokenizers
 accelerate
 numpy
-pandas
 sentencepiece
-datasets

 gradio
 git+https://github.com/huggingface/transformers
 torch
+requests
+Pillow
 accelerate
+tokenizers
+datasets
+torchvision
+torchaudio
 numpy
 sentencepiece