Spaces:
Sleeping
Sleeping
| import os | |
| os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
| import gradio as gr | |
| from transformers import pipeline, AutoTokenizer | |
| import torch | |
| print("=== BUBBLE AI STARTING ===") | |
| # Hermes-3-Llama-3.1-8B (already proven working) | |
| model_name = "NousResearch/Hermes-3-Llama-3.1-8B" | |
| print(f"Loading {model_name}...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Fixed pipeline loading (dtype instead of deprecated torch_dtype) | |
| pipe = pipeline("text-generation", | |
| model_name, | |
| device=-1, # CPU | |
| dtype=torch.float16, | |
| trust_remote_code=True) | |
| print("✅ Model loaded successfully!") | |
| def chat(message, history): | |
| """Fixed chat function - handles pipeline correctly""" | |
| try: | |
| # TextGenerationPipeline expects raw text string directly | |
| outputs = pipe( | |
| message, | |
| max_new_tokens=300, | |
| temperature=0.7, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| truncation=True | |
| ) | |
| # Handle pipeline output format | |
| if isinstance(outputs, list) and len(outputs) > 0: | |
| full_response = outputs[0]["generated_text"] | |
| else: | |
| full_response = str(outputs) | |
| # Remove echoed input if model repeats message | |
| if full_response.startswith(message): | |
| response = full_response[len(message):].strip() | |
| else: | |
| response = full_response.strip() | |
| return response or "Interesting, tell me more..." | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Clean Gradio interface - no invalid parameters | |
| demo = gr.ChatInterface( | |
| fn=chat, | |
| title="Bubble AI - Hermes 3", | |
| description="Claude 4.5 Opus-level conversational AI for your platform" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |