import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download import os # Download a pre-made GGUF model from HuggingFace MODEL_NAME = "TheBloke/Llama-2-7B-Chat-GGUF" MODEL_FILE = "llama-2-7b-chat.Q4_K_M.gguf" print("đŸ“Ĩ Downloading model from HuggingFace...") model_path = hf_hub_download( repo_id=MODEL_NAME, filename=MODEL_FILE, local_dir="./models" ) print(f"✅ Model downloaded to: {model_path}") print("🚀 Loading model...") llm = Llama( model_path=model_path, n_ctx=2048, n_threads=4, n_gpu_layers=0, verbose=False ) print("✅ Model loaded!") def chat(message, history): prompt = "<|begin_of_text|>" for user_msg, bot_msg in history: prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>" prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{bot_msg}<|eot_id|>" prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|>" prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n" response = llm( prompt, max_tokens=512, temperature=0.7, top_p=0.9, stop=["<|eot_id|>", "<|start_header_id|>"], echo=False ) return response['choices'][0]['text'].strip() # Ultra-modern CSS custom_css = """ @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;600;700&display=swap'); * { font-family: 'Space Grotesk', sans-serif !important; } .gradio-container { background: linear-gradient(135deg, #1e3a8a 0%, #7c3aed 50%, #db2777 100%) !important; } #chatbot { height: 650px !important; border-radius: 24px !important; border: 2px solid rgba(255,255,255,0.1) !important; box-shadow: 0 25px 50px -12px rgba(0,0,0,0.5) !important; } .message { padding: 18px 24px !important; border-radius: 20px !important; font-size: 15px !important; margin: 8px 0 !important; backdrop-filter: blur(10px) !important; box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37) !important; } .user { background: linear-gradient(135deg, rgba(147, 51, 234, 0.9) 0%, rgba(219, 39, 119, 0.9) 100%) !important; color: white !important; border: 1px solid rgba(255,255,255,0.2) !important; } .bot { background: linear-gradient(135deg, rgba(59, 130, 246, 0.9) 0%, rgba(147, 51, 234, 0.9) 100%) !important; color: white !important; border: 1px solid rgba(255,255,255,0.2) !important; } button { border-radius: 16px !important; font-weight: 600 !important; transition: all 0.3s ease !important; } button:hover { transform: translateY(-2px) !important; box-shadow: 0 12px 24px rgba(0,0,0,0.3) !important; } .primary { background: linear-gradient(135deg, #9333ea 0%, #db2777 100%) !important; border: none !important; } input, textarea { border-radius: 16px !important; border: 2px solid rgba(255,255,255,0.2) !important; background: rgba(255,255,255,0.1) !important; backdrop-filter: blur(10px) !important; color: white !important; } input::placeholder, textarea::placeholder { color: rgba(255,255,255,0.6) !important; } .prose { color: white !important; } .prose h1 { background: linear-gradient(135deg, #fbbf24 0%, #f59e0b 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 700 !important; } footer { display: none !important; } """ with gr.Blocks( theme=gr.themes.Glass( primary_hue="purple", secondary_hue="pink", ), css=custom_css, title="đŸĻ™ Llama 3.2 AI" ) as demo: gr.Markdown( """ # đŸĻ™ Llama Chat AI Assistant (TEST) ### ⚡ Testing deployment with pre-trained model """ ) chatbot = gr.Chatbot( elem_id="chatbot", bubble_full_width=False, avatar_images=( "https://em-content.zobj.net/thumbs/120/apple/354/sparkles_2728.png", "https://em-content.zobj.net/thumbs/120/apple/354/llama_1f999.png" ), height=650, show_copy_button=True, likeable=True ) with gr.Row(): msg = gr.Textbox( placeholder="✨ Ask me anything...", show_label=False, scale=8, container=False ) submit = gr.Button("Send 🚀", scale=1, variant="primary", size="lg") gr.Examples( examples=[ "🌍 What is the capital of France?", "🧠 Explain quantum computing simply", "đŸ’ģ Write fibonacci in Python", "😴 Tips for better sleep?", "đŸ”ĸ Continue: 2, 4, 6, 8...", "📝 Write a haiku about AI", ], inputs=msg, label="💡 Quick Start:" ) with gr.Accordion("â„šī¸ Model Information", open=False): gr.Markdown( """ **Testing Model:** - 🤖 Model: Llama 2 7B Chat (Pre-trained) - âš™ī¸ Format: GGUF (q4_k_m quantization) - 📝 Note: This is a test deployment. Will be replaced with fine-tuned model. """ ) clear = gr.ClearButton([msg, chatbot], value="đŸ—‘ī¸ New Chat") submit.click(chat, [msg, chatbot], [chatbot]) submit.click(lambda: "", None, msg) msg.submit(chat, [msg, chatbot], [chatbot]) msg.submit(lambda: "", None, msg) if __name__ == "__main__": demo.queue() demo.launch(share=False)