Spaces:

stevafernandes
/

open-source-llam4

No application file

App Files Files Community

open-source-llam4 / appy.py

stevafernandes

Create appy.py

fc77a06 verified 3 months ago

raw

history blame contribute delete

7.14 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import spaces
	import os

	# Available official Llama models (require access approval from Meta):
	OFFICIAL_LLAMA_MODELS = {
	"Llama-3.2-1B": "meta-llama/Llama-3.2-1B-Instruct",
	"Llama-3.2-3B": "meta-llama/Llama-3.2-3B-Instruct",
	"Llama-3.1-8B": "meta-llama/Llama-3.1-8B-Instruct",
	"Llama-3.1-70B": "meta-llama/Llama-3.1-70B-Instruct",
	"Llama-3.1-405B": "meta-llama/Llama-3.1-405B-Instruct", # Requires massive GPU resources
	}

	# Select your model (start with smaller ones for testing)
	MODEL_ID = OFFICIAL_LLAMA_MODELS["Llama-3.2-8B"]


	print(f"Loading official Llama model: {MODEL_ID}")
	print("Note: This requires approval from Meta. Request access at:")
	print(f"https://huggingface.co/{MODEL_ID}")

	# Check for Hugging Face token (required for Llama models)
	HF_TOKEN = os.environ.get("HF_TOKEN")
	if not HF_TOKEN:
	print("WARNING: HF_TOKEN not found. You need to:")
	print("1. Request access to Llama models from Meta")
	print("2. Create a Hugging Face access token")
	print("3. Add it as a Space secret named 'HF_TOKEN'")

	device = "cuda" if torch.cuda.is_available() else "cpu"

	try:
	# Load tokenizer with authentication
	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_ID,
	token=HF_TOKEN,
	trust_remote_code=False # Security: Don't execute remote code
	)

	# Load model with authentication
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	token=HF_TOKEN,
	torch_dtype=torch.float16 if device == "cuda" else torch.float32,
	device_map="auto",
	trust_remote_code=False, # Security: Don't execute remote code
	low_cpu_mem_usage=True
	)

	model_loaded = True
	print(f"✅ Successfully loaded {MODEL_ID}")

	except Exception as e:
	model_loaded = False
	print(f"❌ Failed to load model: {e}")
	print("\nTo fix this:")
	print("1. Request access at: https://huggingface.co/meta-llama")
	print("2. Create token at: https://huggingface.co/settings/tokens")
	print("3. Add token to Space secrets as 'HF_TOKEN'")

	@spaces.GPU(duration=60)
	def generate_response(
	message,
	history,
	max_tokens=512,
	temperature=0.1,
	top_p=0.95,
	):
	"""Generate response using official Llama model"""

	if not model_loaded:
	return "⚠️ Model not loaded. Please set up HF_TOKEN and request Llama access from Meta."

	# Format conversation for Llama's expected format
	messages = []
	for user_msg, assistant_msg in history:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})
	messages.append({"role": "user", "content": message})

	# Apply Llama's chat template
	prompt = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	# Tokenize
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
	inputs = {k: v.to(device) for k, v in inputs.items()}

	# Generate
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.eos_token_id,
	)

	# Decode response
	response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
	return response

	# Create Gradio interface
	with gr.Blocks(title="Official Llama Chat") as demo:
	gr.Markdown("""
	# 🦙 Official Llama Model Chat

	IMPORTANT SECURITY NOTICE:
	- This uses ONLY official Llama models from Meta
	- Never download models from unofficial sources
	- Always verify URLs are from trusted domains

	Model: {model_name}

	Setup Required:
	1. Request access: [Meta Llama on Hugging Face](https://huggingface.co/meta-llama)
	2. Create token: [Hugging Face Settings](https://huggingface.co/settings/tokens)
	3. Add token to Space secrets as 'HF_TOKEN'
	""".format(model_name=MODEL_ID if model_loaded else "Not loaded - see setup instructions"))

	if not model_loaded:
	gr.Markdown("""
	### ⚠️ Model Not Loaded

	The model could not be loaded. This is usually because:
	- You haven't added your HF_TOKEN to the Space secrets
	- You haven't been granted access to Llama models by Meta

	Please follow the setup instructions above.
	""")

	chatbot = gr.Chatbot(height=500)

	with gr.Row():
	msg = gr.Textbox(
	label="Message",
	placeholder="Type your message here...",
	lines=2,
	scale=4
	)
	submit_btn = gr.Button("Send", variant="primary", scale=1)

	with gr.Accordion("Generation Settings", open=False):
	max_tokens = gr.Slider(minimum=50, maximum=2048, value=512, label="Max Tokens")
	temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature")
	top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, label="Top P")

	clear_btn = gr.Button("Clear Chat")

	# Example prompts
	gr.Examples(
	examples=[
	"What are the key principles of secure coding?",
	"Explain the importance of using official software sources",
	"How can I verify if a download link is legitimate?",
	],
	inputs=msg,
	)

	# Event handlers
	def user_submit(message, history):
	return "", history + [[message, None]]

	def bot_response(history, max_tokens, temperature, top_p):
	if not history:
	return history

	message = history[-1][0]
	bot_message = generate_response(
	message,
	history[:-1],
	max_tokens,
	temperature,
	top_p
	)
	history[-1][1] = bot_message
	return history

	msg.submit(user_submit, [msg, chatbot], [msg, chatbot]).then(
	bot_response, [chatbot, max_tokens, temperature, top_p], chatbot
	)

	submit_btn.click(user_submit, [msg, chatbot], [msg, chatbot]).then(
	bot_response, [chatbot, max_tokens, temperature, top_p], chatbot
	)

	clear_btn.click(lambda: None, outputs=chatbot)

	gr.Markdown("""
	---
	### 🔒 Security Best Practices

	1. Only use official model sources (meta-llama on Hugging Face)
	2. Never run code from untrusted sources
	3. Verify all URLs before downloading
	4. Use access tokens securely (never share them)
	5. Report suspicious links to the platform

	### 📚 Official Resources
	- [Meta AI](https://ai.meta.com/)
	- [Official Llama Page](https://llama.meta.com/)
	- [Hugging Face Meta-Llama](https://huggingface.co/meta-llama)
	""")

	if __name__ == "__main__":
	demo.launch()