Aadhavan12344 commited on
Commit
2e62f2c
·
verified ·
1 Parent(s): 4de0d44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -32
app.py CHANGED
@@ -1,45 +1,61 @@
 
 
 
1
  import gradio as gr
2
  import torch
3
- try:
4
- from transformers import pipeline, AutoTokenizer
5
- TRANSFORMERS_AVAILABLE = True
6
- except ImportError:
7
- TRANSFORMERS_AVAILABLE = False
8
- print("Transformers not installed - check requirements.txt")
9
-
10
- # Tiny test model first (500MB vs 13GB)
11
- model_name = "microsoft/DialoGPT-small"
12
-
13
- if TRANSFORMERS_AVAILABLE:
14
- print("Loading tokenizer...")
15
- tokenizer = AutoTokenizer.from_pretrained(model_name)
16
  tokenizer.pad_token = tokenizer.eos_token
17
-
18
- print("Loading pipeline...")
19
- pipe = pipeline("text-generation",
20
- model_name,
21
- device=-1,
22
- torch_dtype=torch.float32,
23
- trust_remote_code=False)
24
-
25
- print("✅ Model loaded!")
26
 
27
  def chat(message, history):
28
- if not TRANSFORMERS_AVAILABLE:
29
- return "❌ Install transformers first"
30
-
31
  try:
32
- inputs = tokenizer.encode(message, return_tensors="pt", max_length=256, truncation=True)
33
- outputs = pipe(inputs,
34
- max_new_tokens=100,
 
 
 
 
 
 
35
  temperature=0.7,
36
  do_sample=True,
37
  pad_token_id=tokenizer.eos_token_id)
38
 
39
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
- return response[len(message):].strip()
 
 
 
 
 
41
  except Exception as e:
42
  return f"Error: {str(e)}"
43
 
44
- demo = gr.ChatInterface(fn=chat, title="Bubble AI Test")
45
- demo.launch()
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
3
+
4
  import gradio as gr
5
  import torch
6
+ from transformers import pipeline, AutoTokenizer
7
+
8
+ print("=== BUBBLE AI STARTING ===")
9
+
10
+ # Your desired DeepHermes model (start small for testing)
11
+ model_name = "NousResearch/Hermes-3-Llama-3.1-8B" # 4.5GB, guaranteed CPU fit
12
+
13
+ print(f"Loading {model_name}...")
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ if tokenizer.pad_token is None:
 
 
 
16
  tokenizer.pad_token = tokenizer.eos_token
17
+
18
+ pipe = pipeline("text-generation",
19
+ model_name,
20
+ device=-1, # CPU only
21
+ torch_dtype=torch.float16,
22
+ trust_remote_code=True)
23
+
24
+ print("✅ Model loaded successfully!")
 
25
 
26
  def chat(message, history):
27
+ """Chat function - native Hermes engagement, no system prompt needed"""
 
 
28
  try:
29
+ # Tokenize input
30
+ inputs = tokenizer(message,
31
+ return_tensors="pt",
32
+ truncation=True,
33
+ max_length=512)
34
+
35
+ # Generate response
36
+ outputs = pipe(**inputs,
37
+ max_new_tokens=300,
38
  temperature=0.7,
39
  do_sample=True,
40
  pad_token_id=tokenizer.eos_token_id)
41
 
42
+ # Decode only new response
43
+ full_response = tokenizer.decode(outputs[0]["generated_ids"],
44
+ skip_special_tokens=True)
45
+ new_response = full_response[len(message):].strip()
46
+
47
+ return new_response
48
+
49
  except Exception as e:
50
  return f"Error: {str(e)}"
51
 
52
+ # Gradio ChatInterface (your existing UI unchanged)
53
+ demo = gr.ChatInterface(
54
+ fn=chat,
55
+ title="Bubble AI - DeepHermes Hermes-3",
56
+ description="Claude 4.5 Opus-level conversational AI for your platform",
57
+ theme="soft"
58
+ )
59
+
60
+ if __name__ == "__main__":
61
+ demo.launch()