Spaces:

stevafernandes
/

open-source-llam4

No application file

App Files Files Community

stevafernandes commited on Oct 24, 2025

Commit

fc77a06

verified ·

1 Parent(s): 78c60ea

Create appy.py

Browse files

Files changed (1) hide show

appy.py +214 -0

appy.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import spaces
+import os
+# Available official Llama models (require access approval from Meta):
+OFFICIAL_LLAMA_MODELS = {
+    "Llama-3.2-1B": "meta-llama/Llama-3.2-1B-Instruct",
+    "Llama-3.2-3B": "meta-llama/Llama-3.2-3B-Instruct",
+    "Llama-3.1-8B": "meta-llama/Llama-3.1-8B-Instruct",
+    "Llama-3.1-70B": "meta-llama/Llama-3.1-70B-Instruct",
+    "Llama-3.1-405B": "meta-llama/Llama-3.1-405B-Instruct",  # Requires massive GPU resources
+}
+# Select your model (start with smaller ones for testing)
+MODEL_ID = OFFICIAL_LLAMA_MODELS["Llama-3.2-8B"]
+print(f"Loading official Llama model: {MODEL_ID}")
+print("Note: This requires approval from Meta. Request access at:")
+print(f"https://huggingface.co/{MODEL_ID}")
+# Check for Hugging Face token (required for Llama models)
+HF_TOKEN = os.environ.get("HF_TOKEN")
+if not HF_TOKEN:
+    print("WARNING: HF_TOKEN not found. You need to:")
+    print("1. Request access to Llama models from Meta")
+    print("2. Create a Hugging Face access token")
+    print("3. Add it as a Space secret named 'HF_TOKEN'")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+try:
+    # Load tokenizer with authentication
+    tokenizer = AutoTokenizer.from_pretrained(
+        MODEL_ID,
+        token=HF_TOKEN,
+        trust_remote_code=False  # Security: Don't execute remote code
+    )
+    # Load model with authentication
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_ID,
+        token=HF_TOKEN,
+        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+        device_map="auto",
+        trust_remote_code=False,  # Security: Don't execute remote code
+        low_cpu_mem_usage=True
+    )
+    model_loaded = True
+    print(f"✅ Successfully loaded {MODEL_ID}")
+except Exception as e:
+    model_loaded = False
+    print(f"❌ Failed to load model: {e}")
+    print("\nTo fix this:")
+    print("1. Request access at: https://huggingface.co/meta-llama")
+    print("2. Create token at: https://huggingface.co/settings/tokens")
+    print("3. Add token to Space secrets as 'HF_TOKEN'")
+@spaces.GPU(duration=60)
+def generate_response(
+    message,
+    history,
+    max_tokens=512,
+    temperature=0.1,
+    top_p=0.95,
+):
+    """Generate response using official Llama model"""
+    if not model_loaded:
+        return "⚠️ Model not loaded. Please set up HF_TOKEN and request Llama access from Meta."
+    # Format conversation for Llama's expected format
+    messages = []
+    for user_msg, assistant_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
+    messages.append({"role": "user", "content": message})
+    # Apply Llama's chat template
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    # Tokenize
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    # Generate
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            do_sample=True,
+            eos_token_id=tokenizer.eos_token_id,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+    # Decode response
+    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
+    return response
+# Create Gradio interface
+with gr.Blocks(title="Official Llama Chat") as demo:
+    gr.Markdown("""
+    # 🦙 Official Llama Model Chat
+    **IMPORTANT SECURITY NOTICE:**
+    - This uses ONLY official Llama models from Meta
+    - Never download models from unofficial sources
+    - Always verify URLs are from trusted domains
+    **Model**: {model_name}
+    **Setup Required**:
+    1. Request access: [Meta Llama on Hugging Face](https://huggingface.co/meta-llama)
+    2. Create token: [Hugging Face Settings](https://huggingface.co/settings/tokens)
+    3. Add token to Space secrets as 'HF_TOKEN'
+    """.format(model_name=MODEL_ID if model_loaded else "Not loaded - see setup instructions"))
+    if not model_loaded:
+        gr.Markdown("""
+        ### ⚠️ Model Not Loaded
+        The model could not be loaded. This is usually because:
+        - You haven't added your HF_TOKEN to the Space secrets
+        - You haven't been granted access to Llama models by Meta
+        Please follow the setup instructions above.
+        """)
+    chatbot = gr.Chatbot(height=500)
+    with gr.Row():
+        msg = gr.Textbox(
+            label="Message",
+            placeholder="Type your message here...",
+            lines=2,
+            scale=4
+        )
+        submit_btn = gr.Button("Send", variant="primary", scale=1)
+    with gr.Accordion("Generation Settings", open=False):
+        max_tokens = gr.Slider(minimum=50, maximum=2048, value=512, label="Max Tokens")
+        temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature")
+        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, label="Top P")
+    clear_btn = gr.Button("Clear Chat")
+    # Example prompts
+    gr.Examples(
+        examples=[
+            "What are the key principles of secure coding?",
+            "Explain the importance of using official software sources",
+            "How can I verify if a download link is legitimate?",
+        ],
+        inputs=msg,
+    )
+    # Event handlers
+    def user_submit(message, history):
+        return "", history + [[message, None]]
+    def bot_response(history, max_tokens, temperature, top_p):
+        if not history:
+            return history
+        message = history[-1][0]
+        bot_message = generate_response(
+            message,
+            history[:-1],
+            max_tokens,
+            temperature,
+            top_p
+        )
+        history[-1][1] = bot_message
+        return history
+    msg.submit(user_submit, [msg, chatbot], [msg, chatbot]).then(
+        bot_response, [chatbot, max_tokens, temperature, top_p], chatbot
+    )
+    submit_btn.click(user_submit, [msg, chatbot], [msg, chatbot]).then(
+        bot_response, [chatbot, max_tokens, temperature, top_p], chatbot
+    )
+    clear_btn.click(lambda: None, outputs=chatbot)
+    gr.Markdown("""
+    ---
+    ### 🔒 Security Best Practices
+    1. **Only use official model sources** (meta-llama on Hugging Face)
+    2. **Never run code from untrusted sources**
+    3. **Verify all URLs before downloading**
+    4. **Use access tokens securely** (never share them)
+    5. **Report suspicious links** to the platform
+    ### 📚 Official Resources
+    - [Meta AI](https://ai.meta.com/)
+    - [Official Llama Page](https://llama.meta.com/)
+    - [Hugging Face Meta-Llama](https://huggingface.co/meta-llama)
+    """)
+if __name__ == "__main__":
+    demo.launch()