Spaces:

AiCoderv2
/

app-fhancs-12

Sleeping

App Files Files Community

app-fhancs-12 / app.py

AiCoderv2

Deploy Gradio app with multiple files

d796a40 verified 6 months ago

raw

history blame contribute delete

7.42 kB

	import gradio as gr
	import torch
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	pipeline
	)
	import spaces
	import time
	import os

	# Model configuration
	MODEL_NAME = "microsoft/DialoGPT-medium" # 1.5B parameters, close to 2B
	# Alternative 2B models you could try:
	# "microsoft/Phi-2" (2.7B - requires special handling)
	# "EleutherAI/gpt-neo-2.7B" (2.7B parameters)

	# Global variables
	tokenizer = None
	model = None
	chat_history = []

	def load_model():
	"""Load the model and tokenizer"""
	global tokenizer, model

	if tokenizer is None or model is None:
	print("Loading model and tokenizer...")

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side="left")
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	# Load model
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float32, # Use float32 for CPU compatibility
	low_cpu_mem_usage=True
	)

	print(f"Model {MODEL_NAME} loaded successfully!")

	return tokenizer, model

	@spaces.GPU(duration=120) # Use GPU if available, with 2-minute timeout
	def generate_response(user_message, history=None):
	"""
	Generate response using the loaded model

	Args:
	user_message (str): User's input message
	history (list): Previous chat history

	Returns:
	str: Generated response
	"""
	if history is None:
	history = []

	try:
	# Load model if not already loaded
	load_model()

	# Prepare input
	chat_history = history.copy()
	chat_history.append(user_message)

	# Combine all messages for context
	context = "\n".join([f"Human: {msg}" if i % 2 == 0 else f"Assistant: {msg}"
	for i, msg in enumerate(chat_history)])
	context += "\nAssistant:"

	# Tokenize input
	inputs = tokenizer.encode(context, return_tensors="pt", max_length=1024, truncation=True)

	# Generate response
	with torch.no_grad():
	outputs = model.generate(
	inputs,
	max_length=inputs.shape[1] + 100,
	num_return_sequences=1,
	temperature=0.7,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.encode("Human")[0]
	)

	# Decode response
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Extract only the assistant's response
	if "Assistant:" in response:
	response = response.split("Assistant:")[-1].strip()
	else:
	# Fallback: try to extract meaningful response
	response = response[len(context):].strip()

	# Clean up response
	response = response.split("\n")[0].strip()

	# Remove any remaining "Human:" parts
	if "Human:" in response:
	response = response.split("Human:")[0].strip()

	# Ensure response is not empty
	if not response or len(response.strip()) < 2:
	response = "I'm here to chat! What would you like to talk about?"

	return response

	except Exception as e:
	print(f"Error generating response: {e}")
	return "I apologize, but I'm having trouble generating a response right now. Please try again!"

	def chat_interface(message, history):
	"""
	Chat interface function
	"""
	if not message.strip():
	return history, ""

	# Generate response
	response = generate_response(message, history)

	# Update history
	history.append(message)
	history.append(response)

	# Keep history manageable (last 10 exchanges)
	if len(history) > 20:
	history = history[-20:]

	return history, ""

	def clear_chat():
	"""Clear the chat history"""
	return []

	# Create the Gradio interface
	def create_demo():
	"""Create the Gradio demo"""

	# Custom CSS for better styling
	css = """
	.gradio-container {
	max-width: 800px !important;
	margin: auto !important;
	}
	.header {
	text-align: center;
	padding: 20px;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	border-radius: 10px;
	margin-bottom: 20px;
	}
	.model-info {
	text-align: center;
	padding: 10px;
	background-color: #f0f2f6;
	border-radius: 5px;
	margin-bottom: 20px;
	font-size: 0.9em;
	}
	"""

	with gr.Blocks(css=css, title="Free 2B Parameter Chatbot") as demo:

	# Header
	gr.HTML("""
	<div class="header">
	<h1>🤖 Free 2B Parameter Chatbot</h1>
	<p>Chat with a 2B parameter AI model for free! Fast responses, unlimited chat.</p>
	<p><a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: white; text-decoration: underline;">Built with anycoder</a></p>
	</div>
	""")

	# Model info
	gr.HTML(f"""
	<div class="model-info">
	<strong>Model:</strong> {MODEL_NAME} (1.5B parameters)<br>
	<strong>Type:</strong> Conversational AI<br>
	<strong>Powered by:</strong> Hugging Face Transformers
	</div>
	""")

	# Chat interface
	chatbot = gr.Chatbot(
	label="Chat with AI",
	height=600,
	bubble_full_width=False,
	avatar_images=(None, None)
	)

	msg = gr.Textbox(
	label="Your message",
	placeholder="Type your message here...",
	scale=4
	)

	with gr.Row():
	send_btn = gr.Button("Send", variant="primary", scale=1)
	clear_btn = gr.Button("Clear Chat", variant="secondary", scale=1)

	# Example prompts
	gr.Examples(
	examples=[
	"Hello! How are you today?",
	"Tell me a joke",
	"What's the weather like?",
	"Can you help me with coding?",
	"What's your favorite movie?",
	"Explain quantum physics",
	"Tell me about space exploration",
	"Write a short poem about AI"
	],
	inputs=msg,
	label="Example prompts to get started"
	)

	# Event handlers
	msg.submit(
	chat_interface,
	inputs=[msg, chatbot],
	outputs=[chatbot, msg]
	)

	send_btn.click(
	chat_interface,
	inputs=[msg, chatbot],
	outputs=[chatbot, msg]
	)

	clear_btn.click(
	clear_chat,
	outputs=chatbot
	)

	return demo

	if __name__ == "__main__":
	# Create and launch the demo
	demo = create_demo()

	# Launch with optimal settings for Hugging Face Spaces
	demo.launch(
	share=False, # Disable sharing since this is for Hugging Face Spaces
	inbrowser=False,
	server_name="0.0.0.0",
	server_port=7860,
	show_api=False,
	quiet=True
	)