import os os.environ["TOKENIZERS_PARALLELISM"] = "false" import gradio as gr from transformers import pipeline, AutoTokenizer import torch print("=== BUBBLE AI STARTING ===") # Hermes-3-Llama-3.1-8B (already proven working) model_name = "NousResearch/Hermes-3-Llama-3.1-8B" print(f"Loading {model_name}...") tokenizer = AutoTokenizer.from_pretrained(model_name) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Fixed pipeline loading (dtype instead of deprecated torch_dtype) pipe = pipeline("text-generation", model_name, device=-1, # CPU dtype=torch.float16, trust_remote_code=True) print("✅ Model loaded successfully!") def chat(message, history): """Fixed chat function - handles pipeline correctly""" try: # TextGenerationPipeline expects raw text string directly outputs = pipe( message, max_new_tokens=300, temperature=0.7, do_sample=True, pad_token_id=tokenizer.eos_token_id, truncation=True ) # Handle pipeline output format if isinstance(outputs, list) and len(outputs) > 0: full_response = outputs[0]["generated_text"] else: full_response = str(outputs) # Remove echoed input if model repeats message if full_response.startswith(message): response = full_response[len(message):].strip() else: response = full_response.strip() return response or "Interesting, tell me more..." except Exception as e: return f"Error: {str(e)}" # Clean Gradio interface - no invalid parameters demo = gr.ChatInterface( fn=chat, title="Bubble AI - Hermes 3", description="Claude 4.5 Opus-level conversational AI for your platform" ) if __name__ == "__main__": demo.launch()