Spaces:

kainatq
/

Quantum-keek-chat

Build error

App Files Files Community

kainatq commited on Nov 29, 2025

Commit

5f5bc69

verified ·

1 Parent(s): 92c792d

Create app.py

Browse files

Files changed (1) hide show

app.py +254 -0

app.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import gradio as gr
+from llama_cpp import Llama
+import time
+import os
+# Configuration
+MODEL_REPO = "kainatq/quantum-keek-7b-Q4_K_M-GGUF"
+MODEL_FILE = "quantum-keek-7b-q4_k_m.gguf"
+MODEL_PATH = f"./{MODEL_FILE}"
+# Initialize the model
+def load_model():
+    try:
+        # Download model if not exists
+        if not os.path.exists(MODEL_PATH):
+            print("Downloading model... This may take a while.")
+            from huggingface_hub import hf_hub_download
+            hf_hub_download(
+                repo_id=MODEL_REPO,
+                filename=MODEL_FILE,
+                local_dir=".",
+                local_dir_use_symlinks=False
+            )
+        # Initialize Llama with CPU optimization
+        llm = Llama(
+            model_path=MODEL_PATH,
+            n_ctx=4096,  # Context window
+            n_threads=2,  # Use both vCPUs
+            n_batch=512,
+            use_mlock=False,  # Don't lock memory (limited RAM)
+            use_mmap=True,    # Use memory mapping
+            verbose=False
+        )
+        print("Model loaded successfully!")
+        return llm
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        return None
+# Load the model
+llm = load_model()
+def chat_with_ai(message, history, system_prompt, temperature, max_tokens):
+    """
+    Function to handle chat interactions with the AI model
+    """
+    if llm is None:
+        return "Error: Model not loaded. Please check the console for details."
+    # Prepare conversation history
+    conversation = []
+    # Add system prompt
+    if system_prompt:
+        conversation.append({"role": "system", "content": system_prompt})
+    # Add history
+    for human, assistant in history:
+        conversation.extend([
+            {"role": "user", "content": human},
+            {"role": "assistant", "content": assistant}
+        ])
+    # Add current message
+    conversation.append({"role": "user", "content": message})
+    try:
+        # Create prompt from conversation
+        prompt = ""
+        for msg in conversation:
+            if msg["role"] == "system":
+                prompt += f"System: {msg['content']}\n\n"
+            elif msg["role"] == "user":
+                prompt += f"User: {msg['content']}\n\n"
+            elif msg["role"] == "assistant":
+                prompt += f"Assistant: {msg['content']}\n\n"
+        prompt += "Assistant:"
+        # Generate response
+        start_time = time.time()
+        response = llm(
+            prompt,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=0.95,
+            stop=["User:", "System:"],
+            echo=False,
+            stream=False
+        )
+        generation_time = time.time() - start_time
+        answer = response['choices'][0]['text'].strip()
+        # Add generation info
+        tokens_used = response['usage']['total_tokens']
+        answer += f"\n\n---\n*Generated in {generation_time:.2f}s using {tokens_used} tokens*"
+        return answer
+    except Exception as e:
+        return f"Error generating response: {str(e)}"
+def clear_chat():
+    """Clear the chat history"""
+    return [], ""
+# Custom CSS for ChatGPT-like styling
+custom_css = """
+#chatbot {
+    min-height: 400px;
+    border: 1px solid #e0e0e0;
+    border-radius: 10px;
+    padding: 20px;
+    background: #f9f9f9;
+}
+.gradio-container {
+    max-width: 1200px !important;
+    margin: 0 auto !important;
+}
+.dark #chatbot {
+    background: #1e1e1e;
+    border-color: #444;
+}
+"""
+# Create the Gradio interface
+with gr.Blocks(
+    title="🪐 Quantum Keek Chat",
+    theme=gr.themes.Soft(),
+    css=custom_css
+) as demo:
+    gr.Markdown(
+        """
+        # 🪐 Quantum Keek Chat
+        *Powered by Quantum Keek 7B GGUF - Running on CPU with llama.cpp*
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### Configuration")
+            system_prompt = gr.Textbox(
+                label="System Prompt",
+                value="You are Quantum Keek, a helpful AI assistant. Provide detailed, thoughtful responses to user queries.",
+                lines=3,
+                placeholder="Enter system instructions..."
+            )
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature",
+                info="Higher values = more creative, Lower values = more focused"
+            )
+            max_tokens = gr.Slider(
+                minimum=100,
+                maximum=2048,
+                value=512,
+                step=50,
+                label="Max Tokens",
+                info="Maximum length of response"
+            )
+            clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary")
+            gr.Markdown(
+                """
+                ---
+                **Model Info:**
+                - **Model:** Quantum Keek 7B Q4_K_M
+                - **Platform:** CPU (llama.cpp)
+                - **Context:** 4096 tokens
+                """
+            )
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(
+                label="🪐 Quantum Keek",
+                elem_id="chatbot",
+                height=500,
+                show_copy_button=True
+            )
+            msg = gr.Textbox(
+                label="Your message",
+                placeholder="Type your message here...",
+                lines=2,
+                max_lines=5
+            )
+            with gr.Row():
+                submit_btn = gr.Button("🚀 Send", variant="primary")
+                stop_btn = gr.Button("⏹️ Stop", variant="secondary")
+    # Event handlers
+    submit_event = msg.submit(
+        fn=chat_with_ai,
+        inputs=[msg, chatbot, system_prompt, temperature, max_tokens],
+        outputs=[chatbot]
+    ).then(
+        lambda: "",  # Clear input
+        outputs=[msg]
+    )
+    submit_btn.click(
+        fn=chat_with_ai,
+        inputs=[msg, chatbot, system_prompt, temperature, max_tokens],
+        outputs=[chatbot]
+    ).then(
+        lambda: "",  # Clear input
+        outputs=[msg]
+    )
+    clear_btn.click(
+        fn=clear_chat,
+        outputs=[chatbot, msg]
+    )
+    # Stop button functionality
+    def stop_generation():
+        # This is a placeholder - in a real implementation you'd need to handle streaming
+        return "Generation stopped by user."
+    stop_btn.click(
+        fn=stop_generation,
+        outputs=[msg]
+    )
+    gr.Markdown(
+        """
+        ---
+        **Note:** This is running on Hugging Face Spaces free tier (2vCPU, 16GB RAM).
+        Responses may take a few seconds to generate.
+        """
+    )
+if __name__ == "__main__":
+    # Set huggingface token if needed (for gated models)
+    # os.environ["HUGGINGFACE_HUB_TOKEN"] = "your_token_here"
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )