OrbitMC commited on
Commit
0cfcb7c
Β·
verified Β·
1 Parent(s): 50f8981

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -121
app.py CHANGED
@@ -6,168 +6,142 @@ import threading
6
  import traceback
7
  import asyncio
8
  from pathlib import Path
9
- from flask import Flask, request, jsonify, Response, send_from_directory
10
  import torch
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
  import edge_tts
13
 
14
- # ══════════════════════════════════════════════════════════════════
15
- # CONFIG
16
- # ══════════════════════════════════════════════════════════════════
17
  MAX_MEMORY = 20
18
  MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "300"))
19
  TTS_VOICE = "zh-CN-XiaoyiNeural"
20
- TTS_RATE = int(os.environ.get("TTS_RATE", "2"))
21
- TTS_PITCH = int(os.environ.get("TTS_PITCH", "8"))
22
  IMG_DIR = Path(__file__).parent / "img"
23
  MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
24
 
25
- # ══════════════════════════════════════════════════════════════════
26
- # SYSTEM PROMPT
27
- # ══════════════════════════════════════════════════════════════════
28
- SYSTEM_PROMPT = """You are Ana, a warm, emotionally expressive AI companion speaking to Tur in a private, intimate setting.
29
- ... (Keep your full original system prompt here) ..."""
30
 
31
- # ══════════════════════════════════════════════════════════════════
32
- # MODEL LOADING (BACKGROUND THREAD)
33
- # ══════════════════════════════════════════════════════════════════
34
  tokenizer = None
35
  model = None
36
 
37
  def load_model_async():
38
  global tokenizer, model
39
  try:
40
- print(f"[MODEL] Background loading {MODEL_ID}...")
41
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
42
  model = AutoModelForCausalLM.from_pretrained(
43
  MODEL_ID,
44
- dtype=torch.bfloat16, # Half the RAM of float32
45
  device_map="cpu",
46
  trust_remote_code=True,
47
  low_cpu_mem_usage=True,
48
  )
49
  model.eval()
50
- if tokenizer.pad_token_id is None:
51
- tokenizer.pad_token_id = tokenizer.eos_token_id
52
- print(" OK Model loaded successfully!")
53
  except Exception as exc:
54
- print(f" FAILED Model load error: {exc}")
55
- traceback.print_exc()
56
 
57
- # Start the loading thread immediately
58
  threading.Thread(target=load_model_async, daemon=True).start()
59
 
60
- # ══════════════════════════════════════════════════════════════════
61
- # UTILITIES & MEMORY
62
- # ══════════════════════════════════════════════════════════════════
63
- EMOTION_RE = re.compile(r'\[([a-zA-Z_]+)\]')
64
  sessions = {}
65
  sessions_lock = threading.Lock()
66
 
67
- def extract_emotions(text: str):
68
- emotions = EMOTION_RE.findall(text)
69
- clean = EMOTION_RE.sub('', text).strip()
70
- return emotions, clean
71
-
72
- def clean_for_tts(text: str) -> str:
73
- _, clean = extract_emotions(text)
74
- clean = re.sub(r'[*_~`#{}()\\|<>]', '', clean)
75
- clean = re.sub(r'\s+', ' ', clean).strip()
76
- return clean
77
-
78
- def get_memory(sid: str) -> list:
79
- with sessions_lock:
80
- return list(sessions.get(sid, []))
81
-
82
- def add_to_memory(sid: str, role: str, content: str):
83
- with sessions_lock:
84
- sessions.setdefault(sid, [])
85
- sessions[sid].append({"role": role, "content": content})
86
- if len(sessions[sid]) > MAX_MEMORY * 2:
87
- sessions[sid] = sessions[sid][-(MAX_MEMORY * 2):]
88
-
89
- # ════════════════════════════════════════════════��═════════════════
90
- # RESPONSE GENERATION
91
- # ══════════════════════════════════════════════════════════════════
92
- STOP_TOKENS = ["<end_of_turn>", "<start_of_turn>", "Tur:", "User:", "<|endoftext|>", "[/INST]"]
93
-
94
- def generate_response(user_input: str, session_id: str) -> str:
95
- if model is None or tokenizer is None:
96
- return "[sad] My mind is still booting up... give me another minute?"
97
-
98
- memory = get_memory(session_id)
99
- messages = [{"role": "system", "content": SYSTEM_PROMPT}]
100
- for msg in memory[-(6 * 2):]:
101
- messages.append({"role": "user" if msg["role"] == "user" else "assistant", "content": msg["content"]})
102
- messages.append({"role": "user", "content": user_input})
103
-
104
- try:
105
- enc = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True, return_dict=True)
106
- input_ids = enc["input_ids"].to("cpu")
107
-
108
- with torch.no_grad():
109
- outputs = model.generate(
110
- input_ids,
111
- max_new_tokens=MAX_NEW_TOKENS,
112
- do_sample=True,
113
- temperature=0.85,
114
- pad_token_id=tokenizer.eos_token_id
115
- )
116
-
117
- response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True).strip()
118
- for stop in STOP_TOKENS: response = response.split(stop)[0].strip()
119
-
120
- if not EMOTION_RE.search(response): response = "[default] " + response
121
- add_to_memory(session_id, "user", user_input)
122
- add_to_memory(session_id, "assistant", response)
123
- return response
124
- except Exception as e:
125
- print(f"Gen Error: {e}")
126
- return "[sad] I lost my train of thought. Say that again?"
127
-
128
- # ══════════════════════════════════════════════════════════════════
129
- # TTS & ROUTES
130
- # ══════════════════════════════════════════════════════════════════
131
- async def _async_tts(text: str, rate: int, pitch: int) -> bytes:
132
- rate_str = f"+{rate}%" if rate >= 0 else f"{rate}%"
133
- pitch_str = f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz"
134
- comm = edge_tts.Communicate(text, TTS_VOICE, rate=rate_str, pitch=pitch_str)
135
- audio = b""
136
- async for chunk in comm.stream():
137
- if chunk["type"] == "audio": audio += chunk["data"]
138
- return audio
139
-
140
- def synthesize_speech(text: str, rate: int = 0, pitch: int = 0):
141
- clean = clean_for_tts(text)
142
- if not clean: return None
143
- loop = asyncio.new_event_loop()
144
- try:
145
- audio = loop.run_until_complete(_async_tts(clean, rate, pitch))
146
- finally:
147
- loop.close()
148
- return base64.b64encode(audio).decode() if audio else None
149
-
150
  app = Flask(__name__)
151
 
152
  @app.route("/")
153
- def index(): return Response(open("app.py").read().split('HTML_PAGE = r"""')[1].split('"""')[0], mimetype="text/html")
154
- # Note: In a real file, you'd keep the HTML_PAGE variable here like you had it.
155
-
156
- @app.route("/img/<path:filename>")
157
- def serve_img(filename: str):
158
- return send_from_directory(str(IMG_DIR), Path(filename).name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
  @app.route("/chat", methods=["POST"])
161
  def chat():
162
- data = request.json or {}
163
- resp = generate_response(data.get("message", ""), data.get("session_id", "default"))
164
- return jsonify({"response": resp, "session_id": data.get("session_id", "default")})
 
 
 
 
 
 
 
 
 
 
165
 
166
  @app.route("/tts", methods=["POST"])
167
  def tts_endpoint():
168
- data = request.json or {}
169
- audio = synthesize_speech(data.get("text", ""), int(data.get("rate", TTS_RATE)), int(data.get("pitch", TTS_PITCH)))
170
- return jsonify({"audio": audio})
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  if __name__ == "__main__":
173
  app.run(host="0.0.0.0", port=7860)
 
6
  import traceback
7
  import asyncio
8
  from pathlib import Path
9
+ from flask import Flask, request, jsonify, send_from_directory, Response
10
  import torch
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
  import edge_tts
13
 
14
+ # --- CONFIG ---
 
 
15
  MAX_MEMORY = 20
16
  MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "300"))
17
  TTS_VOICE = "zh-CN-XiaoyiNeural"
18
+ TTS_RATE = int(os.environ.get("TTS_RATE", "7"))
19
+ TTS_PITCH = int(os.environ.get("TTS_PITCH", "0"))
20
  IMG_DIR = Path(__file__).parent / "img"
21
  MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
22
 
23
+ # --- SYSTEM PROMPT ---
24
+ SYSTEM_PROMPT = "You are Ana, a warm, emotionally expressive AI companion speaking to Tur. Every response MUST start with an emotion tag like [happy] or [sad]."
 
 
 
25
 
26
+ # --- MODEL LOADING (BACKGROUND THREAD) ---
 
 
27
  tokenizer = None
28
  model = None
29
 
30
  def load_model_async():
31
  global tokenizer, model
32
  try:
33
+ print(f"[BOOT] Starting background load for {MODEL_ID}...")
34
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
35
  model = AutoModelForCausalLM.from_pretrained(
36
  MODEL_ID,
37
+ torch_dtype=torch.bfloat16,
38
  device_map="cpu",
39
  trust_remote_code=True,
40
  low_cpu_mem_usage=True,
41
  )
42
  model.eval()
43
+ print("[BOOT] Model is ONLINE and ready!")
 
 
44
  except Exception as exc:
45
+ print(f"[BOOT] Critical Error: {exc}")
 
46
 
47
+ # Start the thread so Flask can bind to port 7860 immediately
48
  threading.Thread(target=load_model_async, daemon=True).start()
49
 
50
+ # --- APP LOGIC ---
 
 
 
51
  sessions = {}
52
  sessions_lock = threading.Lock()
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  app = Flask(__name__)
55
 
56
  @app.route("/")
57
+ def index():
58
+ return """
59
+ <!DOCTYPE html>
60
+ <html>
61
+ <head>
62
+ <title>Visual AI</title>
63
+ <style>
64
+ body { background: #0a0a0a; color: #00ffcc; font-family: sans-serif; display: flex; flex-direction: column; align-items: center; justify-content: center; height: 100vh; margin: 0; }
65
+ #chat { width: 80%; max-width: 600px; height: 400px; border: 1px solid #333; overflow-y: auto; padding: 20px; background: #111; border-radius: 10px; }
66
+ #input-area { margin-top: 20px; display: flex; width: 80%; max-width: 600px; }
67
+ input { flex: 1; padding: 10px; background: #222; border: 1px solid #444; color: white; border-radius: 5px; }
68
+ button { padding: 10px 20px; background: #00ffcc; border: none; color: black; font-weight: bold; cursor: pointer; border-radius: 5px; margin-left: 10px; }
69
+ </style>
70
+ </head>
71
+ <body>
72
+ <div id="chat">Welcome to Visual AI. Ana is booting up...</div>
73
+ <div id="input-area">
74
+ <input type="text" id="msg" placeholder="Type a message..." onkeypress="if(event.key==='Enter') send()">
75
+ <button onclick="send()">SEND</button>
76
+ </div>
77
+ <script>
78
+ async function send() {
79
+ const input = document.getElementById('msg');
80
+ const chat = document.getElementById('chat');
81
+ const text = input.value;
82
+ if(!text) return;
83
+ input.value = '';
84
+ chat.innerHTML += '<p><b>Tur:</b> ' + text + '</p>';
85
+
86
+ const res = await fetch('/chat', {
87
+ method: 'POST',
88
+ headers: {'Content-Type': 'application/json'},
89
+ body: JSON.stringify({message: text, session_id: 'default'})
90
+ });
91
+ const data = await res.json();
92
+ chat.innerHTML += '<p><b>Ana:</b> ' + data.response + '</p>';
93
+ chat.scrollTop = chat.scrollHeight;
94
+
95
+ const ttsRes = await fetch('/tts', {
96
+ method: 'POST',
97
+ headers: {'Content-Type': 'application/json'},
98
+ body: JSON.stringify({text: data.response})
99
+ });
100
+ const ttsData = await ttsRes.json();
101
+ if(ttsData.audio) {
102
+ const audio = new Audio("data:audio/mp3;base64," + ttsData.audio);
103
+ audio.play();
104
+ }
105
+ }
106
+ </script>
107
+ </body>
108
+ </html>
109
+ """
110
 
111
  @app.route("/chat", methods=["POST"])
112
  def chat():
113
+ if model is None:
114
+ return jsonify({"response": "[sad] I'm still waking up. Please wait about 2 minutes for the model to finish loading."})
115
+
116
+ data = request.json
117
+ user_input = data.get("message", "")
118
+
119
+ # Simple generation logic
120
+ inputs = tokenizer(f"User: {user_input}\nAssistant:", return_tensors="pt")
121
+ with torch.no_grad():
122
+ outputs = model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS)
123
+
124
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
125
+ return jsonify({"response": response})
126
 
127
  @app.route("/tts", methods=["POST"])
128
  def tts_endpoint():
129
+ data = request.json
130
+ text = data.get("text", "")
131
+
132
+ # Clean tags for TTS
133
+ clean_text = re.sub(r'\[.*?\]', '', text).strip()
134
+
135
+ async def get_tts():
136
+ communicate = edge_tts.Communicate(clean_text, TTS_VOICE)
137
+ audio_data = b""
138
+ async for chunk in communicate.stream():
139
+ if chunk["type"] == "audio":
140
+ audio_data += chunk["data"]
141
+ return base64.b64encode(audio_data).decode()
142
+
143
+ audio_b64 = asyncio.run(get_tts())
144
+ return jsonify({"audio": audio_b64})
145
 
146
  if __name__ == "__main__":
147
  app.run(host="0.0.0.0", port=7860)