Spaces:
No application file
No application file
File size: 7,138 Bytes
fc77a06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import spaces
import os
# Available official Llama models (require access approval from Meta):
OFFICIAL_LLAMA_MODELS = {
"Llama-3.2-1B": "meta-llama/Llama-3.2-1B-Instruct",
"Llama-3.2-3B": "meta-llama/Llama-3.2-3B-Instruct",
"Llama-3.1-8B": "meta-llama/Llama-3.1-8B-Instruct",
"Llama-3.1-70B": "meta-llama/Llama-3.1-70B-Instruct",
"Llama-3.1-405B": "meta-llama/Llama-3.1-405B-Instruct", # Requires massive GPU resources
}
# Select your model (start with smaller ones for testing)
MODEL_ID = OFFICIAL_LLAMA_MODELS["Llama-3.2-8B"]
print(f"Loading official Llama model: {MODEL_ID}")
print("Note: This requires approval from Meta. Request access at:")
print(f"https://huggingface.co/{MODEL_ID}")
# Check for Hugging Face token (required for Llama models)
HF_TOKEN = os.environ.get("HF_TOKEN")
if not HF_TOKEN:
print("WARNING: HF_TOKEN not found. You need to:")
print("1. Request access to Llama models from Meta")
print("2. Create a Hugging Face access token")
print("3. Add it as a Space secret named 'HF_TOKEN'")
device = "cuda" if torch.cuda.is_available() else "cpu"
try:
# Load tokenizer with authentication
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
token=HF_TOKEN,
trust_remote_code=False # Security: Don't execute remote code
)
# Load model with authentication
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
token=HF_TOKEN,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
device_map="auto",
trust_remote_code=False, # Security: Don't execute remote code
low_cpu_mem_usage=True
)
model_loaded = True
print(f"✅ Successfully loaded {MODEL_ID}")
except Exception as e:
model_loaded = False
print(f"❌ Failed to load model: {e}")
print("\nTo fix this:")
print("1. Request access at: https://huggingface.co/meta-llama")
print("2. Create token at: https://huggingface.co/settings/tokens")
print("3. Add token to Space secrets as 'HF_TOKEN'")
@spaces.GPU(duration=60)
def generate_response(
message,
history,
max_tokens=512,
temperature=0.1,
top_p=0.95,
):
"""Generate response using official Llama model"""
if not model_loaded:
return "⚠️ Model not loaded. Please set up HF_TOKEN and request Llama access from Meta."
# Format conversation for Llama's expected format
messages = []
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
# Apply Llama's chat template
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Tokenize
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
inputs = {k: v.to(device) for k, v in inputs.items()}
# Generate
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
)
# Decode response
response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
return response
# Create Gradio interface
with gr.Blocks(title="Official Llama Chat") as demo:
gr.Markdown("""
# 🦙 Official Llama Model Chat
**IMPORTANT SECURITY NOTICE:**
- This uses ONLY official Llama models from Meta
- Never download models from unofficial sources
- Always verify URLs are from trusted domains
**Model**: {model_name}
**Setup Required**:
1. Request access: [Meta Llama on Hugging Face](https://huggingface.co/meta-llama)
2. Create token: [Hugging Face Settings](https://huggingface.co/settings/tokens)
3. Add token to Space secrets as 'HF_TOKEN'
""".format(model_name=MODEL_ID if model_loaded else "Not loaded - see setup instructions"))
if not model_loaded:
gr.Markdown("""
### ⚠️ Model Not Loaded
The model could not be loaded. This is usually because:
- You haven't added your HF_TOKEN to the Space secrets
- You haven't been granted access to Llama models by Meta
Please follow the setup instructions above.
""")
chatbot = gr.Chatbot(height=500)
with gr.Row():
msg = gr.Textbox(
label="Message",
placeholder="Type your message here...",
lines=2,
scale=4
)
submit_btn = gr.Button("Send", variant="primary", scale=1)
with gr.Accordion("Generation Settings", open=False):
max_tokens = gr.Slider(minimum=50, maximum=2048, value=512, label="Max Tokens")
temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, label="Top P")
clear_btn = gr.Button("Clear Chat")
# Example prompts
gr.Examples(
examples=[
"What are the key principles of secure coding?",
"Explain the importance of using official software sources",
"How can I verify if a download link is legitimate?",
],
inputs=msg,
)
# Event handlers
def user_submit(message, history):
return "", history + [[message, None]]
def bot_response(history, max_tokens, temperature, top_p):
if not history:
return history
message = history[-1][0]
bot_message = generate_response(
message,
history[:-1],
max_tokens,
temperature,
top_p
)
history[-1][1] = bot_message
return history
msg.submit(user_submit, [msg, chatbot], [msg, chatbot]).then(
bot_response, [chatbot, max_tokens, temperature, top_p], chatbot
)
submit_btn.click(user_submit, [msg, chatbot], [msg, chatbot]).then(
bot_response, [chatbot, max_tokens, temperature, top_p], chatbot
)
clear_btn.click(lambda: None, outputs=chatbot)
gr.Markdown("""
---
### 🔒 Security Best Practices
1. **Only use official model sources** (meta-llama on Hugging Face)
2. **Never run code from untrusted sources**
3. **Verify all URLs before downloading**
4. **Use access tokens securely** (never share them)
5. **Report suspicious links** to the platform
### 📚 Official Resources
- [Meta AI](https://ai.meta.com/)
- [Official Llama Page](https://llama.meta.com/)
- [Hugging Face Meta-Llama](https://huggingface.co/meta-llama)
""")
if __name__ == "__main__":
demo.launch() |