Spaces:

Aadhavan12344
/

Bubble

Sleeping

App Files Files Community

Aadhavan12344 commited on Jan 12

Commit

4d8fec8

verified ·

1 Parent(s): c1c2f6f

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -24

app.py CHANGED Viewed

@@ -2,57 +2,62 @@ import os
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 import gradio as gr
-import torch
 from transformers import pipeline, AutoTokenizer
 print("=== BUBBLE AI STARTING ===")
-# Your desired DeepHermes model (start small for testing)
-model_name = "NousResearch/Hermes-3-Llama-3.1-8B"  # 4.5GB, guaranteed CPU fit
 print(f"Loading {model_name}...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
 pipe = pipeline("text-generation",
                 model_name,
-                device=-1,  # CPU only
-                torch_dtype=torch.float16,
                 trust_remote_code=True)
 print("✅ Model loaded successfully!")
 def chat(message, history):
-    """Chat function - native Hermes engagement, no system prompt needed"""
     try:
-        # Tokenize input
-        inputs = tokenizer(message,
-                          return_tensors="pt",
-                          truncation=True,
-                          max_length=512)
-        # Generate response
-        outputs = pipe(**inputs,
-                      max_new_tokens=300,
-                      temperature=0.7,
-                      do_sample=True,
-                      pad_token_id=tokenizer.eos_token_id)
-        # Decode only new response
-        full_response = tokenizer.decode(outputs[0]["generated_ids"],
-                                       skip_special_tokens=True)
-        new_response = full_response[len(message):].strip()
-        return new_response
     except Exception as e:
         return f"Error: {str(e)}"
-# Gradio ChatInterface (your existing UI unchanged)
 demo = gr.ChatInterface(
     fn=chat,
-    title="Bubble AI - DeepHermes Hermes-3",
     description="Claude 4.5 Opus-level conversational AI for your platform"
 )

 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 import gradio as gr
 from transformers import pipeline, AutoTokenizer
+import torch
 print("=== BUBBLE AI STARTING ===")
+# Hermes-3-Llama-3.1-8B (already proven working)
+model_name = "NousResearch/Hermes-3-Llama-3.1-8B"
 print(f"Loading {model_name}...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
+# Fixed pipeline loading (dtype instead of deprecated torch_dtype)
 pipe = pipeline("text-generation",
                 model_name,
+                device=-1,  # CPU
+                dtype=torch.float16,
                 trust_remote_code=True)
 print("✅ Model loaded successfully!")
 def chat(message, history):
+    """Fixed chat function - handles pipeline correctly"""
     try:
+        # TextGenerationPipeline expects raw text string directly
+        outputs = pipe(
+            message,
+            max_new_tokens=300,
+            temperature=0.7,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+            truncation=True
+        )
+        # Handle pipeline output format
+        if isinstance(outputs, list) and len(outputs) > 0:
+            full_response = outputs[0]["generated_text"]
+        else:
+            full_response = str(outputs)
+        # Remove echoed input if model repeats message
+        if full_response.startswith(message):
+            response = full_response[len(message):].strip()
+        else:
+            response = full_response.strip()
+        return response or "Interesting, tell me more..."
     except Exception as e:
         return f"Error: {str(e)}"
+# Clean Gradio interface - no invalid parameters
 demo = gr.ChatInterface(
     fn=chat,
+    title="Bubble AI - Hermes 3",
     description="Claude 4.5 Opus-level conversational AI for your platform"
 )