Ana

Paused

App Files Files Community

OrbitMC commited on 17 days ago

Commit

a71972d

verified ·

1 Parent(s): 269c854

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -111

app.py CHANGED Viewed

@@ -7,27 +7,20 @@ import traceback
 import asyncio
 from pathlib import Path
 from flask import Flask, request, jsonify, send_from_directory, Response
-# GGUF Imports
-from huggingface_hub import hf_hub_download
-from llama_cpp import Llama
 import edge_tts
 # ══════════════════════════════════════════════════════════════════
 # CONFIG
 # ══════════════════════════════════════════════════════════════════
-BASE_DIR       = Path(__file__).parent
-IMG_DIR        = BASE_DIR / "img"
 MAX_MEMORY     = 20
 MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "300"))
 TTS_VOICE      = "zh-CN-XiaoyiNeural"
 TTS_RATE       = int(os.environ.get("TTS_RATE",  "-4"))
 TTS_PITCH      = int(os.environ.get("TTS_PITCH", "7"))
-# UNSLOTH GGUF CONFIG
-REPO_ID        = "unsloth/Qwen2.5-3B-Instruct-GGUF"
-FILENAME       = "qwen2.5-3b-instruct-q4_k_m.gguf"
 # ══════════════════════════════════════════════════════════════════
 # SYSTEM PROMPT
@@ -58,6 +51,7 @@ TTS FORMATTING:
 # ══════════════════════════════════════════════════════════════════
 # EMOTION TAG UTILITIES
 # ══════════════════════════════════════════════════════════════════
 EMOTION_RE = re.compile(r'\[([a-zA-Z_]+)\]')
 def extract_emotions(text: str):
@@ -73,30 +67,31 @@ def clean_for_tts(text: str) -> str:
     return clean
 # ══════════════════════════════════════════════════════════════════
-# MODEL LOADING (GGUF via llama.cpp)
 # ══════════════════════════════════════════════════════════════════
 print("=" * 60)
-print("  Visual AI -- Booting Systems (GGUF Fast Mode)")
 print("=" * 60)
-model = None
 try:
-    print(f"[MODEL] Downloading/Locating {FILENAME} ...")
-    model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
-    print(f"[MODEL] Loading into RAM...")
-    # Auto-detect best thread count
-    threads = max(1, os.cpu_count() - 1) if os.cpu_count() else 4
-    model = Llama(
-        model_path=model_path,
-        n_ctx=2048,
-        n_threads=threads,
-        n_gpu_layers=0,       # Pure CPU inference (super fast for 3B/4B GGUF)
-        chat_format="chatml", # Qwen Native Format
-        verbose=False
     )
     print("  OK  Model loaded successfully!")
 except Exception as exc:
     print(f"  FAILED  Model load error: {exc}")
@@ -122,8 +117,13 @@ def add_to_memory(sid: str, role: str, content: str):
 # ══════════════════════════════════════════════════════════════════
 # RESPONSE GENERATION
 # ══════════════════════════════════════════════════════════════════
 def generate_response(user_input: str, session_id: str) -> str:
-    if model is None:
         return "[sad] My mind is offline right now. Please give me a moment."
     memory = get_memory(session_id)
@@ -137,22 +137,66 @@ def generate_response(user_input: str, session_id: str) -> str:
         })
     messages.append({"role": "user", "content": user_input})
     try:
-        outputs = model.create_chat_completion(
-            messages=messages,
-            max_tokens=MAX_NEW_TOKENS,
-            temperature=0.85,
-            top_k=40,
-            top_p=0.90,
-            repeat_penalty=1.1,
-            stop=["<|im_end|>", "<|im_start|>"]
         )
-        response = outputs["choices"][0]["message"]["content"].strip()
     except Exception as exc:
         print(f"[GENERATE] Error: {exc}")
         traceback.print_exc()
         return "[sad] Something went wrong in my mind. Could you say that again?"
     if not response or len(response) < 3:
         response = "[thinking] I lost my train of thought. Could you say that again?"
@@ -192,7 +236,7 @@ def synthesize_speech(text: str, rate: int = 0, pitch: int = 0):
     return base64.b64encode(audio).decode() if audio else None
 # ══════════════════════════════════════════════════════════════════
-# HTML -- Fast Loading, Crossfade Image Switch, Background Music
 # ══════════════════════════════════════════════════════════════════
 HTML_PAGE = r"""<!DOCTYPE html>
 <html lang="en">
@@ -221,18 +265,21 @@ body{
   inset:0;
   z-index:0;
   background:#000;
 }
-/* CROSSFADE IMAGE SETUP */
-.bg-img {
-  position:absolute;
-  inset:0;
   width:100%;
   height:100%;
   object-fit:contain;
   object-position:center center;
   display:block;
-  transition: opacity 0.6s ease-in-out;
 }
 #overlay{
@@ -362,8 +409,7 @@ body{
 <body>
 <div id="bg">
-  <img id="bgImgB" class="bg-img" style="opacity: 0; z-index: 1;">
-  <img id="bgImgA" class="bg-img" style="opacity: 1; z-index: 2;" src="/img/default.png" onerror="this.src='/img/default.png'">
 </div>
 <div id="overlay">
@@ -386,81 +432,48 @@ let busy = false, activeAudio = null;
 const MA = document.getElementById('msgArea');
 const MI = document.getElementById('msgIn');
 const SB = document.getElementById('sendBtn');
-// --- BACKGROUND MUSIC SYSTEM ---
-let musicStarted = false;
-const bgMusic = new Audio('/music.mp3');
-bgMusic.volume = 0.12; // 12% Volume
-bgMusic.loop = true;
-function tryStartMusic() {
-  if (musicStarted) return;
-  bgMusic.play().then(() => {
-    musicStarted = true;
-  }).catch(err => {
-    // Fails silently if music.mp3 is 404 or autoplay is blocked
-    console.log("Background music skipped or not found:", err);
-  });
-}
-document.body.addEventListener('click', tryStartMusic, {once:true});
-document.body.addEventListener('keydown', tryStartMusic, {once:true});
-// --- IMAGE CROSSFADE SYSTEM ---
 const availableImages = new Set();
-let activeBg = document.getElementById('bgImgA');
-let inactiveBg = document.getElementById('bgImgB');
 fetch('/api/images')
   .then(res => res.json())
   .then(files => {
     files.forEach(f => {
       const name = f.toLowerCase();
       availableImages.add(name);
       const img = new Image();
-      img.src = `/img/${name}.png`; // Preload
     });
   })
   .catch(err => console.warn('Could not load image list:', err));
-function smoothSwap(emotion) {
   const key = emotion.toLowerCase();
-  const targetSrc = availableImages.has(key) ? `/img/${key}.png` : '/img/default.png';
-  // Prevent crossfading to the image we are already looking at
-  if (activeBg.getAttribute('src') === targetSrc || activeBg.src.endsWith(targetSrc)) return;
-  // Load new image in the invisible layer behind
-  inactiveBg.src = targetSrc;
-  inactiveBg.onload = () => {
-    // Bring inactive to front, fade it in
-    inactiveBg.style.zIndex = "2";
-    inactiveBg.style.opacity = "1";
-    // Push active back, set it to fade out
-    activeBg.style.zIndex = "1";
-    activeBg.style.opacity = "0";
-    // Swap variables
-    let temp = activeBg;
-    activeBg = inactiveBg;
-    inactiveBg = temp;
-  };
 }
 function playImgSequence(emotions) {
-  if (!emotions || emotions.length === 0) { smoothSwap('default'); return; }
   const queue = [...emotions];
   (function next() {
     if (!queue.length) return;
-    smoothSwap(queue.shift());
-    // Give enough time for the 0.6s CSS transition to finish before next emotion
-    if (queue.length) setTimeout(next, 1200);
   })();
 }
-/* Parse emotion tags */
 function parseResponse(raw) {
   const tagRe = /\[([a-zA-Z_]+)\]/g;
   const emotions = [];
@@ -545,7 +558,6 @@ async function send() {
   }
   busy = false; SB.disabled = false;
-  MI.focus();
 }
 MI.addEventListener('keydown', e => {
@@ -564,15 +576,15 @@ app = Flask(__name__)
 def index():
     return Response(HTML_PAGE, mimetype="text/html")
-# API to send list of available images to frontend
 @app.route("/api/images")
 def api_images():
     if not IMG_DIR.exists():
         return jsonify([])
     files = [f.stem for f in IMG_DIR.glob("*.png")]
     return jsonify(files)
-# Serve PNG Images
 @app.route("/img/<path:filename>")
 def serve_img(filename: str):
     safe   = Path(filename).name
@@ -580,21 +592,13 @@ def serve_img(filename: str):
     if target.exists() and target.is_file():
         return send_from_directory(str(IMG_DIR), safe)
     fallback = IMG_DIR / "default.png"
     if fallback.exists() and fallback.is_file():
         return send_from_directory(str(IMG_DIR), "default.png")
     return Response("", status=404)
-# Serve Optional Background Music
-@app.route("/music.mp3")
-def serve_music():
-    music_file = BASE_DIR / "music.mp3"
-    if music_file.exists() and music_file.is_file():
-        return send_from_directory(str(BASE_DIR), "music.mp3")
-    return Response("Music file not found, skipping gracefully.", status=404)
-# Chat Endpoint
 @app.route("/chat", methods=["POST"])
 def chat():
     data       = request.json or {}
@@ -610,7 +614,6 @@ def chat():
         resp = "[sad] I encountered an unexpected error. Please try again."
     return jsonify({"response": resp, "session_id": session_id})
-# Voice TTS Endpoint
 @app.route("/tts", methods=["POST"])
 def tts_endpoint():
     data  = request.json or {}
@@ -633,7 +636,8 @@ def clear():
 @app.route("/health")
 def health():
     return jsonify({
-        "model_loaded": model is not None,
     })
 if __name__ == "__main__":

 import asyncio
 from pathlib import Path
 from flask import Flask, request, jsonify, send_from_directory, Response
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import edge_tts
 # ══════════════════════════════════════════════════════════════════
 # CONFIG
 # ══════════════════════════════════════════════════════════════════
 MAX_MEMORY     = 20
 MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "300"))
 TTS_VOICE      = "zh-CN-XiaoyiNeural"
 TTS_RATE       = int(os.environ.get("TTS_RATE",  "-4"))
 TTS_PITCH      = int(os.environ.get("TTS_PITCH", "7"))
+IMG_DIR        = Path(__file__).parent / "img"
+MODEL_ID       = "Qwen/Qwen2.5-1.5B-Instruct"
 # ══════════════════════════════════════════════════════════════════
 # SYSTEM PROMPT
 # ══════════════════════════════════════════════════════════════════
 # EMOTION TAG UTILITIES
 # ══════════════════════════════════════════════════════════════════
+# Now fully supports underscores (e.g. [lite_sad])
 EMOTION_RE = re.compile(r'\[([a-zA-Z_]+)\]')
 def extract_emotions(text: str):
     return clean
 # ══════════════════════════════════════════════════════════════════
+# MODEL LOADING
 # ══════════════════════════════════════════════════════════════════
 print("=" * 60)
+print("  Visual AI -- Booting Systems")
 print("=" * 60)
+tokenizer = None
+model     = None
 try:
+    print(f"[MODEL] Loading {MODEL_ID} ...")
+    tokenizer = AutoTokenizer.from_pretrained(
+        MODEL_ID,
+        trust_remote_code=True,
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_ID,
+        dtype=torch.float32,
+        device_map="cpu",
+        trust_remote_code=True,
+        low_cpu_mem_usage=True,
     )
+    model.eval()
+    if tokenizer.pad_token_id is None:
+        tokenizer.pad_token_id = tokenizer.eos_token_id
     print("  OK  Model loaded successfully!")
 except Exception as exc:
     print(f"  FAILED  Model load error: {exc}")
 # ══════════════════════════════════════════════════════════════════
 # RESPONSE GENERATION
 # ══════════════════════════════════════════════════════════════════
+STOP_TOKENS = [
+    "<end_of_turn>", "<start_of_turn>",
+    "Tur:", "User:", "<|endoftext|>", "[/INST]",
+]
 def generate_response(user_input: str, session_id: str) -> str:
+    if model is None or tokenizer is None:
         return "[sad] My mind is offline right now. Please give me a moment."
     memory = get_memory(session_id)
         })
     messages.append({"role": "user", "content": user_input})
+    input_ids      = None
+    attention_mask = None
     try:
+        enc = tokenizer.apply_chat_template(
+            messages,
+            return_tensors="pt",
+            add_generation_prompt=True,
+            return_dict=True,
         )
+        input_ids      = enc["input_ids"].to("cpu")
+        attention_mask = enc.get("attention_mask")
+        if attention_mask is not None:
+            attention_mask = attention_mask.to("cpu")
+    except Exception as e1:
+        print(f"[TOKENISE] chat_template failed ({e1}), using fallback")
+        try:
+            parts = [f"System: {SYSTEM_PROMPT}"]
+            for msg in recent:
+                label = "Tur" if msg["role"] == "user" else "Ana"
+                parts.append(f"{label}: {msg['content']}")
+            parts.append(f"Tur: {user_input}\nAna:")
+            enc            = tokenizer("\n".join(parts), return_tensors="pt")
+            input_ids      = enc["input_ids"].to("cpu")
+            attention_mask = enc.get("attention_mask")
+            if attention_mask is not None:
+                attention_mask = attention_mask.to("cpu")
+        except Exception as e2:
+            print(f"[TOKENISE] fallback failed: {e2}")
+            return "[sad] I could not process that. Please try again."
+    try:
+        gen_kwargs = dict(
+            max_new_tokens=MAX_NEW_TOKENS,
+            do_sample=True,
+            temperature=0.90,
+            top_k=50,
+            top_p=0.95,
+            repetition_penalty=1.1,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+        if attention_mask is not None:
+            gen_kwargs["attention_mask"] = attention_mask
+        with torch.no_grad():
+            outputs = model.generate(input_ids, **gen_kwargs)
     except Exception as exc:
         print(f"[GENERATE] Error: {exc}")
         traceback.print_exc()
         return "[sad] Something went wrong in my mind. Could you say that again?"
+    new_tokens = outputs[0][input_ids.shape[-1]:]
+    response   = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
+    for stop in STOP_TOKENS:
+        if stop in response:
+            response = response.split(stop)[0].strip()
+    if "\n\n" in response:
+        response = response.split("\n\n")[0].strip()
     if not response or len(response) < 3:
         response = "[thinking] I lost my train of thought. Could you say that again?"
     return base64.b64encode(audio).decode() if audio else None
 # ══════════════════════════════════════════════════════════════════
+# HTML -- Fast Loading, Instant Swap, Contain Image View
 # ══════════════════════════════════════════════════════════════════
 HTML_PAGE = r"""<!DOCTYPE html>
 <html lang="en">
   inset:0;
   z-index:0;
   background:#000;
+  display:flex;
+  align-items:center;
+  justify-content:center;
 }
+/*
+  object-fit: contain prevents cuts/overflow and displays the full image intact.
+  No transitions = INSTANT image swapping.
+*/
+#bgImg{
   width:100%;
   height:100%;
   object-fit:contain;
   object-position:center center;
   display:block;
 }
 #overlay{
 <body>
 <div id="bg">
+  <img id="bgImg" src="/img/default.png" alt="" onerror="this.src='/img/default.png'">
 </div>
 <div id="overlay">
 const MA = document.getElementById('msgArea');
 const MI = document.getElementById('msgIn');
 const SB = document.getElementById('sendBtn');
+const BG = document.getElementById('bgImg');
+// Background Image Preloading System
 const availableImages = new Set();
+const imageCache = {};
+// 1. Fetch available images from the server and preload them into browser memory
 fetch('/api/images')
   .then(res => res.json())
   .then(files => {
     files.forEach(f => {
       const name = f.toLowerCase();
       availableImages.add(name);
       const img = new Image();
+      img.src = `/img/${name}.png`; // Pre-cache request
+      imageCache[name] = img;
     });
   })
   .catch(err => console.warn('Could not load image list:', err));
+// 2. Instant swap logic (No transition delays, loaded instantly from browser memory)
+function instantSwap(emotion) {
   const key = emotion.toLowerCase();
+  if (availableImages.has(key)) {
+    BG.src = `/img/${key}.png`;
+  } else {
+    BG.src = '/img/default.png'; // Fallback
+  }
 }
 function playImgSequence(emotions) {
+  if (!emotions || emotions.length === 0) { instantSwap('default'); return; }
   const queue = [...emotions];
   (function next() {
     if (!queue.length) return;
+    instantSwap(queue.shift());
+    if (queue.length) setTimeout(next, 750); // Pause briefly between multiple emotions
   })();
 }
+/* Parse emotion tags (Fully supports underscores) */
 function parseResponse(raw) {
   const tagRe = /\[([a-zA-Z_]+)\]/g;
   const emotions = [];
   }
   busy = false; SB.disabled = false;
 }
 MI.addEventListener('keydown', e => {
 def index():
     return Response(HTML_PAGE, mimetype="text/html")
+# Preload API for the frontend
 @app.route("/api/images")
 def api_images():
     if not IMG_DIR.exists():
         return jsonify([])
+    # Find all png files and return their filenames without extension
     files = [f.stem for f in IMG_DIR.glob("*.png")]
     return jsonify(files)
 @app.route("/img/<path:filename>")
 def serve_img(filename: str):
     safe   = Path(filename).name
     if target.exists() and target.is_file():
         return send_from_directory(str(IMG_DIR), safe)
+    # Safely fallback to default.png if specific image is missing server-side
     fallback = IMG_DIR / "default.png"
     if fallback.exists() and fallback.is_file():
         return send_from_directory(str(IMG_DIR), "default.png")
     return Response("", status=404)
 @app.route("/chat", methods=["POST"])
 def chat():
     data       = request.json or {}
         resp = "[sad] I encountered an unexpected error. Please try again."
     return jsonify({"response": resp, "session_id": session_id})
 @app.route("/tts", methods=["POST"])
 def tts_endpoint():
     data  = request.json or {}
 @app.route("/health")
 def health():
     return jsonify({
+        "model_loaded":     model is not None,
+        "tokenizer_loaded": tokenizer is not None,
     })
 if __name__ == "__main__":