Spaces:

oscarrgkth
/

lab2

Build error

lab2 / app.py

gusreinaos

Add UI with default model for now

f3adf75 20 days ago

5.49 kB

	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import os

	# Download a pre-made GGUF model from HuggingFace
	MODEL_NAME = "TheBloke/Llama-2-7B-Chat-GGUF"
	MODEL_FILE = "llama-2-7b-chat.Q4_K_M.gguf"

	print("📥 Downloading model from HuggingFace...")
	model_path = hf_hub_download(
	repo_id=MODEL_NAME,
	filename=MODEL_FILE,
	local_dir="./models"
	)
	print(f"✅ Model downloaded to: {model_path}")

	print("🚀 Loading model...")
	llm = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_threads=4,
	n_gpu_layers=0,
	verbose=False
	)
	print("✅ Model loaded!")

	def chat(message, history):
	prompt = "<\|begin_of_text\|>"

	for user_msg, bot_msg in history:
	prompt += f"<\|start_header_id\|>user<\|end_header_id\|>\n\n{user_msg}<\|eot_id\|>"
	prompt += f"<\|start_header_id\|>assistant<\|end_header_id\|>\n\n{bot_msg}<\|eot_id\|>"

	prompt += f"<\|start_header_id\|>user<\|end_header_id\|>\n\n{message}<\|eot_id\|>"
	prompt += "<\|start_header_id\|>assistant<\|end_header_id\|>\n\n"

	response = llm(
	prompt,
	max_tokens=512,
	temperature=0.7,
	top_p=0.9,
	stop=["<\|eot_id\|>", "<\|start_header_id\|>"],
	echo=False
	)

	return response['choices'][0]['text'].strip()

	# Ultra-modern CSS
	custom_css = """
	@import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;600;700&display=swap');

	* {
	font-family: 'Space Grotesk', sans-serif !important;
	}

	.gradio-container {
	background: linear-gradient(135deg, #1e3a8a 0%, #7c3aed 50%, #db2777 100%) !important;
	}

	#chatbot {
	height: 650px !important;
	border-radius: 24px !important;
	border: 2px solid rgba(255,255,255,0.1) !important;
	box-shadow: 0 25px 50px -12px rgba(0,0,0,0.5) !important;
	}

	.message {
	padding: 18px 24px !important;
	border-radius: 20px !important;
	font-size: 15px !important;
	margin: 8px 0 !important;
	backdrop-filter: blur(10px) !important;
	box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37) !important;
	}

	.user {
	background: linear-gradient(135deg, rgba(147, 51, 234, 0.9) 0%, rgba(219, 39, 119, 0.9) 100%) !important;
	color: white !important;
	border: 1px solid rgba(255,255,255,0.2) !important;
	}

	.bot {
	background: linear-gradient(135deg, rgba(59, 130, 246, 0.9) 0%, rgba(147, 51, 234, 0.9) 100%) !important;
	color: white !important;
	border: 1px solid rgba(255,255,255,0.2) !important;
	}

	button {
	border-radius: 16px !important;
	font-weight: 600 !important;
	transition: all 0.3s ease !important;
	}

	button:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 12px 24px rgba(0,0,0,0.3) !important;
	}

	.primary {
	background: linear-gradient(135deg, #9333ea 0%, #db2777 100%) !important;
	border: none !important;
	}

	input, textarea {
	border-radius: 16px !important;
	border: 2px solid rgba(255,255,255,0.2) !important;
	background: rgba(255,255,255,0.1) !important;
	backdrop-filter: blur(10px) !important;
	color: white !important;
	}

	input::placeholder, textarea::placeholder {
	color: rgba(255,255,255,0.6) !important;
	}

	.prose {
	color: white !important;
	}

	.prose h1 {
	background: linear-gradient(135deg, #fbbf24 0%, #f59e0b 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-weight: 700 !important;
	}

	footer {
	display: none !important;
	}
	"""

	with gr.Blocks(
	theme=gr.themes.Glass(
	primary_hue="purple",
	secondary_hue="pink",
	),
	css=custom_css,
	title="🦙 Llama 3.2 AI"
	) as demo:

	gr.Markdown(
	"""
	# 🦙 Llama Chat AI Assistant (TEST)
	### ⚡ Testing deployment with pre-trained model
	"""
	)

	chatbot = gr.Chatbot(
	elem_id="chatbot",
	bubble_full_width=False,
	avatar_images=(
	"https://em-content.zobj.net/thumbs/120/apple/354/sparkles_2728.png",
	"https://em-content.zobj.net/thumbs/120/apple/354/llama_1f999.png"
	),
	height=650,
	show_copy_button=True,
	likeable=True
	)

	with gr.Row():
	msg = gr.Textbox(
	placeholder="✨ Ask me anything...",
	show_label=False,
	scale=8,
	container=False
	)
	submit = gr.Button("Send 🚀", scale=1, variant="primary", size="lg")

	gr.Examples(
	examples=[
	"🌍 What is the capital of France?",
	"🧠 Explain quantum computing simply",
	"💻 Write fibonacci in Python",
	"😴 Tips for better sleep?",
	"🔢 Continue: 2, 4, 6, 8...",
	"📝 Write a haiku about AI",
	],
	inputs=msg,
	label="💡 Quick Start:"
	)

	with gr.Accordion("ℹ️ Model Information", open=False):
	gr.Markdown(
	"""
	Testing Model:
	- 🤖 Model: Llama 2 7B Chat (Pre-trained)
	- ⚙️ Format: GGUF (q4_k_m quantization)
	- 📝 Note: This is a test deployment. Will be replaced with fine-tuned model.
	"""
	)

	clear = gr.ClearButton([msg, chatbot], value="🗑️ New Chat")

	submit.click(chat, [msg, chatbot], [chatbot])
	submit.click(lambda: "", None, msg)
	msg.submit(chat, [msg, chatbot], [chatbot])
	msg.submit(lambda: "", None, msg)

	if __name__ == "__main__":
	demo.queue()
	demo.launch(share=False)