File size: 1,944 Bytes
2e62f2c
 
 
2b6edf4
2e62f2c
4d8fec8
2e62f2c
 
 
4d8fec8
 
2e62f2c
 
 
 
600d201
2e62f2c
4d8fec8
2e62f2c
 
4d8fec8
 
2e62f2c
 
 
2b6edf4
 
4d8fec8
600d201
4d8fec8
 
 
 
 
 
 
 
 
2e62f2c
4d8fec8
 
 
 
 
600d201
4d8fec8
 
 
 
 
2e62f2c
4d8fec8
2e62f2c
600d201
 
2b6edf4
4d8fec8
2e62f2c
 
4d8fec8
c1c2f6f
2e62f2c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import gradio as gr
from transformers import pipeline, AutoTokenizer
import torch

print("=== BUBBLE AI STARTING ===")

# Hermes-3-Llama-3.1-8B (already proven working)
model_name = "NousResearch/Hermes-3-Llama-3.1-8B"

print(f"Loading {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Fixed pipeline loading (dtype instead of deprecated torch_dtype)
pipe = pipeline("text-generation", 
                model_name,
                device=-1,  # CPU
                dtype=torch.float16,
                trust_remote_code=True)

print("✅ Model loaded successfully!")

def chat(message, history):
    """Fixed chat function - handles pipeline correctly"""
    try:
        # TextGenerationPipeline expects raw text string directly
        outputs = pipe(
            message,
            max_new_tokens=300,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            truncation=True
        )
        
        # Handle pipeline output format
        if isinstance(outputs, list) and len(outputs) > 0:
            full_response = outputs[0]["generated_text"]
        else:
            full_response = str(outputs)
        
        # Remove echoed input if model repeats message
        if full_response.startswith(message):
            response = full_response[len(message):].strip()
        else:
            response = full_response.strip()
        
        return response or "Interesting, tell me more..."
        
    except Exception as e:
        return f"Error: {str(e)}"

# Clean Gradio interface - no invalid parameters
demo = gr.ChatInterface(
    fn=chat,
    title="Bubble AI - Hermes 3",
    description="Claude 4.5 Opus-level conversational AI for your platform"
)

if __name__ == "__main__":
    demo.launch()