Aadhavan12344 commited on
Commit
4d8fec8
·
verified ·
1 Parent(s): c1c2f6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -24
app.py CHANGED
@@ -2,57 +2,62 @@ import os
2
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
3
 
4
  import gradio as gr
5
- import torch
6
  from transformers import pipeline, AutoTokenizer
 
7
 
8
  print("=== BUBBLE AI STARTING ===")
9
 
10
- # Your desired DeepHermes model (start small for testing)
11
- model_name = "NousResearch/Hermes-3-Llama-3.1-8B" # 4.5GB, guaranteed CPU fit
12
 
13
  print(f"Loading {model_name}...")
14
  tokenizer = AutoTokenizer.from_pretrained(model_name)
15
  if tokenizer.pad_token is None:
16
  tokenizer.pad_token = tokenizer.eos_token
17
 
 
18
  pipe = pipeline("text-generation",
19
  model_name,
20
- device=-1, # CPU only
21
- torch_dtype=torch.float16,
22
  trust_remote_code=True)
23
 
24
  print("✅ Model loaded successfully!")
25
 
26
  def chat(message, history):
27
- """Chat function - native Hermes engagement, no system prompt needed"""
28
  try:
29
- # Tokenize input
30
- inputs = tokenizer(message,
31
- return_tensors="pt",
32
- truncation=True,
33
- max_length=512)
 
 
 
 
34
 
35
- # Generate response
36
- outputs = pipe(**inputs,
37
- max_new_tokens=300,
38
- temperature=0.7,
39
- do_sample=True,
40
- pad_token_id=tokenizer.eos_token_id)
41
 
42
- # Decode only new response
43
- full_response = tokenizer.decode(outputs[0]["generated_ids"],
44
- skip_special_tokens=True)
45
- new_response = full_response[len(message):].strip()
 
46
 
47
- return new_response
48
 
49
  except Exception as e:
50
  return f"Error: {str(e)}"
51
 
52
- # Gradio ChatInterface (your existing UI unchanged)
53
  demo = gr.ChatInterface(
54
  fn=chat,
55
- title="Bubble AI - DeepHermes Hermes-3",
56
  description="Claude 4.5 Opus-level conversational AI for your platform"
57
  )
58
 
 
2
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
3
 
4
  import gradio as gr
 
5
  from transformers import pipeline, AutoTokenizer
6
+ import torch
7
 
8
  print("=== BUBBLE AI STARTING ===")
9
 
10
+ # Hermes-3-Llama-3.1-8B (already proven working)
11
+ model_name = "NousResearch/Hermes-3-Llama-3.1-8B"
12
 
13
  print(f"Loading {model_name}...")
14
  tokenizer = AutoTokenizer.from_pretrained(model_name)
15
  if tokenizer.pad_token is None:
16
  tokenizer.pad_token = tokenizer.eos_token
17
 
18
+ # Fixed pipeline loading (dtype instead of deprecated torch_dtype)
19
  pipe = pipeline("text-generation",
20
  model_name,
21
+ device=-1, # CPU
22
+ dtype=torch.float16,
23
  trust_remote_code=True)
24
 
25
  print("✅ Model loaded successfully!")
26
 
27
  def chat(message, history):
28
+ """Fixed chat function - handles pipeline correctly"""
29
  try:
30
+ # TextGenerationPipeline expects raw text string directly
31
+ outputs = pipe(
32
+ message,
33
+ max_new_tokens=300,
34
+ temperature=0.7,
35
+ do_sample=True,
36
+ pad_token_id=tokenizer.eos_token_id,
37
+ truncation=True
38
+ )
39
 
40
+ # Handle pipeline output format
41
+ if isinstance(outputs, list) and len(outputs) > 0:
42
+ full_response = outputs[0]["generated_text"]
43
+ else:
44
+ full_response = str(outputs)
 
45
 
46
+ # Remove echoed input if model repeats message
47
+ if full_response.startswith(message):
48
+ response = full_response[len(message):].strip()
49
+ else:
50
+ response = full_response.strip()
51
 
52
+ return response or "Interesting, tell me more..."
53
 
54
  except Exception as e:
55
  return f"Error: {str(e)}"
56
 
57
+ # Clean Gradio interface - no invalid parameters
58
  demo = gr.ChatInterface(
59
  fn=chat,
60
+ title="Bubble AI - Hermes 3",
61
  description="Claude 4.5 Opus-level conversational AI for your platform"
62
  )
63