Spaces:
No application file
No application file
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import spaces | |
| import os | |
| # Available official Llama models (require access approval from Meta): | |
| OFFICIAL_LLAMA_MODELS = { | |
| "Llama-3.2-1B": "meta-llama/Llama-3.2-1B-Instruct", | |
| "Llama-3.2-3B": "meta-llama/Llama-3.2-3B-Instruct", | |
| "Llama-3.1-8B": "meta-llama/Llama-3.1-8B-Instruct", | |
| "Llama-3.1-70B": "meta-llama/Llama-3.1-70B-Instruct", | |
| "Llama-3.1-405B": "meta-llama/Llama-3.1-405B-Instruct", # Requires massive GPU resources | |
| } | |
| # Select your model (start with smaller ones for testing) | |
| MODEL_ID = OFFICIAL_LLAMA_MODELS["Llama-3.2-8B"] | |
| print(f"Loading official Llama model: {MODEL_ID}") | |
| print("Note: This requires approval from Meta. Request access at:") | |
| print(f"https://huggingface.co/{MODEL_ID}") | |
| # Check for Hugging Face token (required for Llama models) | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| if not HF_TOKEN: | |
| print("WARNING: HF_TOKEN not found. You need to:") | |
| print("1. Request access to Llama models from Meta") | |
| print("2. Create a Hugging Face access token") | |
| print("3. Add it as a Space secret named 'HF_TOKEN'") | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| try: | |
| # Load tokenizer with authentication | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_ID, | |
| token=HF_TOKEN, | |
| trust_remote_code=False # Security: Don't execute remote code | |
| ) | |
| # Load model with authentication | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| token=HF_TOKEN, | |
| torch_dtype=torch.float16 if device == "cuda" else torch.float32, | |
| device_map="auto", | |
| trust_remote_code=False, # Security: Don't execute remote code | |
| low_cpu_mem_usage=True | |
| ) | |
| model_loaded = True | |
| print(f"✅ Successfully loaded {MODEL_ID}") | |
| except Exception as e: | |
| model_loaded = False | |
| print(f"❌ Failed to load model: {e}") | |
| print("\nTo fix this:") | |
| print("1. Request access at: https://huggingface.co/meta-llama") | |
| print("2. Create token at: https://huggingface.co/settings/tokens") | |
| print("3. Add token to Space secrets as 'HF_TOKEN'") | |
| def generate_response( | |
| message, | |
| history, | |
| max_tokens=512, | |
| temperature=0.1, | |
| top_p=0.95, | |
| ): | |
| """Generate response using official Llama model""" | |
| if not model_loaded: | |
| return "⚠️ Model not loaded. Please set up HF_TOKEN and request Llama access from Meta." | |
| # Format conversation for Llama's expected format | |
| messages = [] | |
| for user_msg, assistant_msg in history: | |
| messages.append({"role": "user", "content": user_msg}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| # Apply Llama's chat template | |
| prompt = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| # Tokenize | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048) | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| # Generate | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=True, | |
| eos_token_id=tokenizer.eos_token_id, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| # Decode response | |
| response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) | |
| return response | |
| # Create Gradio interface | |
| with gr.Blocks(title="Official Llama Chat") as demo: | |
| gr.Markdown(""" | |
| # 🦙 Official Llama Model Chat | |
| **IMPORTANT SECURITY NOTICE:** | |
| - This uses ONLY official Llama models from Meta | |
| - Never download models from unofficial sources | |
| - Always verify URLs are from trusted domains | |
| **Model**: {model_name} | |
| **Setup Required**: | |
| 1. Request access: [Meta Llama on Hugging Face](https://huggingface.co/meta-llama) | |
| 2. Create token: [Hugging Face Settings](https://huggingface.co/settings/tokens) | |
| 3. Add token to Space secrets as 'HF_TOKEN' | |
| """.format(model_name=MODEL_ID if model_loaded else "Not loaded - see setup instructions")) | |
| if not model_loaded: | |
| gr.Markdown(""" | |
| ### ⚠️ Model Not Loaded | |
| The model could not be loaded. This is usually because: | |
| - You haven't added your HF_TOKEN to the Space secrets | |
| - You haven't been granted access to Llama models by Meta | |
| Please follow the setup instructions above. | |
| """) | |
| chatbot = gr.Chatbot(height=500) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| label="Message", | |
| placeholder="Type your message here...", | |
| lines=2, | |
| scale=4 | |
| ) | |
| submit_btn = gr.Button("Send", variant="primary", scale=1) | |
| with gr.Accordion("Generation Settings", open=False): | |
| max_tokens = gr.Slider(minimum=50, maximum=2048, value=512, label="Max Tokens") | |
| temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature") | |
| top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, label="Top P") | |
| clear_btn = gr.Button("Clear Chat") | |
| # Example prompts | |
| gr.Examples( | |
| examples=[ | |
| "What are the key principles of secure coding?", | |
| "Explain the importance of using official software sources", | |
| "How can I verify if a download link is legitimate?", | |
| ], | |
| inputs=msg, | |
| ) | |
| # Event handlers | |
| def user_submit(message, history): | |
| return "", history + [[message, None]] | |
| def bot_response(history, max_tokens, temperature, top_p): | |
| if not history: | |
| return history | |
| message = history[-1][0] | |
| bot_message = generate_response( | |
| message, | |
| history[:-1], | |
| max_tokens, | |
| temperature, | |
| top_p | |
| ) | |
| history[-1][1] = bot_message | |
| return history | |
| msg.submit(user_submit, [msg, chatbot], [msg, chatbot]).then( | |
| bot_response, [chatbot, max_tokens, temperature, top_p], chatbot | |
| ) | |
| submit_btn.click(user_submit, [msg, chatbot], [msg, chatbot]).then( | |
| bot_response, [chatbot, max_tokens, temperature, top_p], chatbot | |
| ) | |
| clear_btn.click(lambda: None, outputs=chatbot) | |
| gr.Markdown(""" | |
| --- | |
| ### 🔒 Security Best Practices | |
| 1. **Only use official model sources** (meta-llama on Hugging Face) | |
| 2. **Never run code from untrusted sources** | |
| 3. **Verify all URLs before downloading** | |
| 4. **Use access tokens securely** (never share them) | |
| 5. **Report suspicious links** to the platform | |
| ### 📚 Official Resources | |
| - [Meta AI](https://ai.meta.com/) | |
| - [Official Llama Page](https://llama.meta.com/) | |
| - [Hugging Face Meta-Llama](https://huggingface.co/meta-llama) | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |