Bubble / app.py
Aadhavan12344's picture
Update app.py
4d8fec8 verified
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import gradio as gr
from transformers import pipeline, AutoTokenizer
import torch
print("=== BUBBLE AI STARTING ===")
# Hermes-3-Llama-3.1-8B (already proven working)
model_name = "NousResearch/Hermes-3-Llama-3.1-8B"
print(f"Loading {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Fixed pipeline loading (dtype instead of deprecated torch_dtype)
pipe = pipeline("text-generation",
model_name,
device=-1, # CPU
dtype=torch.float16,
trust_remote_code=True)
print("✅ Model loaded successfully!")
def chat(message, history):
"""Fixed chat function - handles pipeline correctly"""
try:
# TextGenerationPipeline expects raw text string directly
outputs = pipe(
message,
max_new_tokens=300,
temperature=0.7,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
truncation=True
)
# Handle pipeline output format
if isinstance(outputs, list) and len(outputs) > 0:
full_response = outputs[0]["generated_text"]
else:
full_response = str(outputs)
# Remove echoed input if model repeats message
if full_response.startswith(message):
response = full_response[len(message):].strip()
else:
response = full_response.strip()
return response or "Interesting, tell me more..."
except Exception as e:
return f"Error: {str(e)}"
# Clean Gradio interface - no invalid parameters
demo = gr.ChatInterface(
fn=chat,
title="Bubble AI - Hermes 3",
description="Claude 4.5 Opus-level conversational AI for your platform"
)
if __name__ == "__main__":
demo.launch()