app-fhancs-12 / app.py
AiCoderv2's picture
Deploy Gradio app with multiple files
d796a40 verified
import gradio as gr
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
pipeline
)
import spaces
import time
import os
# Model configuration
MODEL_NAME = "microsoft/DialoGPT-medium" # 1.5B parameters, close to 2B
# Alternative 2B models you could try:
# "microsoft/Phi-2" (2.7B - requires special handling)
# "EleutherAI/gpt-neo-2.7B" (2.7B parameters)
# Global variables
tokenizer = None
model = None
chat_history = []
def load_model():
"""Load the model and tokenizer"""
global tokenizer, model
if tokenizer is None or model is None:
print("Loading model and tokenizer...")
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side="left")
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Load model
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32, # Use float32 for CPU compatibility
low_cpu_mem_usage=True
)
print(f"Model {MODEL_NAME} loaded successfully!")
return tokenizer, model
@spaces.GPU(duration=120) # Use GPU if available, with 2-minute timeout
def generate_response(user_message, history=None):
"""
Generate response using the loaded model
Args:
user_message (str): User's input message
history (list): Previous chat history
Returns:
str: Generated response
"""
if history is None:
history = []
try:
# Load model if not already loaded
load_model()
# Prepare input
chat_history = history.copy()
chat_history.append(user_message)
# Combine all messages for context
context = "\n".join([f"Human: {msg}" if i % 2 == 0 else f"Assistant: {msg}"
for i, msg in enumerate(chat_history)])
context += "\nAssistant:"
# Tokenize input
inputs = tokenizer.encode(context, return_tensors="pt", max_length=1024, truncation=True)
# Generate response
with torch.no_grad():
outputs = model.generate(
inputs,
max_length=inputs.shape[1] + 100,
num_return_sequences=1,
temperature=0.7,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.encode("Human")[0]
)
# Decode response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the assistant's response
if "Assistant:" in response:
response = response.split("Assistant:")[-1].strip()
else:
# Fallback: try to extract meaningful response
response = response[len(context):].strip()
# Clean up response
response = response.split("\n")[0].strip()
# Remove any remaining "Human:" parts
if "Human:" in response:
response = response.split("Human:")[0].strip()
# Ensure response is not empty
if not response or len(response.strip()) < 2:
response = "I'm here to chat! What would you like to talk about?"
return response
except Exception as e:
print(f"Error generating response: {e}")
return "I apologize, but I'm having trouble generating a response right now. Please try again!"
def chat_interface(message, history):
"""
Chat interface function
"""
if not message.strip():
return history, ""
# Generate response
response = generate_response(message, history)
# Update history
history.append(message)
history.append(response)
# Keep history manageable (last 10 exchanges)
if len(history) > 20:
history = history[-20:]
return history, ""
def clear_chat():
"""Clear the chat history"""
return []
# Create the Gradio interface
def create_demo():
"""Create the Gradio demo"""
# Custom CSS for better styling
css = """
.gradio-container {
max-width: 800px !important;
margin: auto !important;
}
.header {
text-align: center;
padding: 20px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
border-radius: 10px;
margin-bottom: 20px;
}
.model-info {
text-align: center;
padding: 10px;
background-color: #f0f2f6;
border-radius: 5px;
margin-bottom: 20px;
font-size: 0.9em;
}
"""
with gr.Blocks(css=css, title="Free 2B Parameter Chatbot") as demo:
# Header
gr.HTML("""
<div class="header">
<h1>🤖 Free 2B Parameter Chatbot</h1>
<p>Chat with a 2B parameter AI model for free! Fast responses, unlimited chat.</p>
<p><a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: white; text-decoration: underline;">Built with anycoder</a></p>
</div>
""")
# Model info
gr.HTML(f"""
<div class="model-info">
<strong>Model:</strong> {MODEL_NAME} (1.5B parameters)<br>
<strong>Type:</strong> Conversational AI<br>
<strong>Powered by:</strong> Hugging Face Transformers
</div>
""")
# Chat interface
chatbot = gr.Chatbot(
label="Chat with AI",
height=600,
bubble_full_width=False,
avatar_images=(None, None)
)
msg = gr.Textbox(
label="Your message",
placeholder="Type your message here...",
scale=4
)
with gr.Row():
send_btn = gr.Button("Send", variant="primary", scale=1)
clear_btn = gr.Button("Clear Chat", variant="secondary", scale=1)
# Example prompts
gr.Examples(
examples=[
"Hello! How are you today?",
"Tell me a joke",
"What's the weather like?",
"Can you help me with coding?",
"What's your favorite movie?",
"Explain quantum physics",
"Tell me about space exploration",
"Write a short poem about AI"
],
inputs=msg,
label="Example prompts to get started"
)
# Event handlers
msg.submit(
chat_interface,
inputs=[msg, chatbot],
outputs=[chatbot, msg]
)
send_btn.click(
chat_interface,
inputs=[msg, chatbot],
outputs=[chatbot, msg]
)
clear_btn.click(
clear_chat,
outputs=chatbot
)
return demo
if __name__ == "__main__":
# Create and launch the demo
demo = create_demo()
# Launch with optimal settings for Hugging Face Spaces
demo.launch(
share=False, # Disable sharing since this is for Hugging Face Spaces
inbrowser=False,
server_name="0.0.0.0",
server_port=7860,
show_api=False,
quiet=True
)