Spaces:

SFM2001
/

spititout

Sleeping

App Files Files Community

MSF commited on 23 days ago

Commit

eb426ec

1 Parent(s): 0d3686b

with api option

Browse files

Files changed (13) hide show

.env.example +9 -7
.gitignore +20 -5
Dockerfile +40 -0
README.md +30 -10
README_SPACE.md +52 -0
app.py +383 -0
package-lock.json +0 -0
package.json +0 -1
requirements.txt +13 -0
src/components/Chat.tsx +36 -42
src/services/geminiService.ts +0 -118
src/services/hfSpaceService.ts +67 -0
vite.config.ts +2 -6

.env.example CHANGED Viewed

@@ -1,9 +1,11 @@
-# GEMINI_API_KEY: Required for Gemini AI API calls.
-# AI Studio automatically injects this at runtime from user secrets.
-# Users configure this via the Secrets panel in the AI Studio UI.
-GEMINI_API_KEY="MY_GEMINI_API_KEY"
-# APP_URL: The URL where this applet is hosted.
-# AI Studio automatically injects this at runtime with the Cloud Run service URL.
-# Used for self-referential links, OAuth callbacks, and API endpoints.
 APP_URL="MY_APP_URL"

+LLM_BACKEND="llamacpp"
+TEXT_MODEL="Qwen/Qwen3-4B-Instruct-2507"
+GGUF_MODEL_REPO="Qwen/Qwen3-1.7B-GGUF"
+GGUF_MODEL_FILE="Qwen3-1.7B-Q8_0.gguf"
+LLAMA_CPP_N_CTX="4096"
+ASR_MODEL="openai/whisper-tiny"
+KOKORO_LANG_CODE="z"
+KOKORO_VOICE="zf_xiaobei"
+MAX_NEW_TOKENS="220"
 APP_URL="MY_APP_URL"

.gitignore CHANGED Viewed

@@ -1,8 +1,23 @@
 node_modules/
-build/
 dist/
-coverage/
 .DS_Store
-*.log
-.env*
-!.env.example

 node_modules/
 dist/
+__pycache__/
 .DS_Store
+.env
+.env.local
+# local caches / model files
+.cache/
+*.incomplete
+# python
+venv/
+.venv/
+__pycache__/
+*.py[cod]
+# env / system
+.env
+.DS_Store
+# node
+node_modules/

Dockerfile ADDED Viewed

	@@ -0,0 +1,40 @@

+FROM node:22-bookworm AS frontend
+WORKDIR /app
+COPY package*.json ./
+RUN npm install
+COPY . .
+RUN npm run build
+FROM python:3.11-slim
+ENV PORT=7860 \
+    LLM_BACKEND=llamacpp \
+    TEXT_MODEL=Qwen/Qwen3-1.7B \
+    GGUF_MODEL_REPO=Qwen/Qwen3-1.7B-GGUF \
+    GGUF_MODEL_FILE=Qwen3-1.7B-Q4_K_M.gguf \
+    LLAMA_CPP_N_CTX=2048 \
+    ASR_MODEL=openai/whisper-tiny \
+    KOKORO_LANG_CODE=z \
+    KOKORO_VOICE=zf_xiaobei \
+    MAX_NEW_TOKENS=160 \
+    LLM_API_BASE_URL=https://api.deepseek.com \
+    LLM_API_MODEL=deepseek-v4-flash \
+    HF_HOME=/data/.huggingface \
+    HUGGINGFACE_HUB_CACHE=/data/.huggingface/hub \
+    TRANSFORMERS_CACHE=/data/.huggingface/transformers
+WORKDIR /app
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends ffmpeg git espeak-ng build-essential cmake \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir -r requirements.txt
+COPY app.py .
+COPY --from=frontend /app/dist ./dist
+EXPOSE 7860
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,20 +1,40 @@
 <div align="center">
 <img width="1200" height="475" alt="GHBanner" src="https://github.com/user-attachments/assets/0aa67016-6eaf-458a-adb2-6e31a0763ed6" />
 </div>
-# Run and deploy your AI Studio app
-This contains everything you need to run your app locally.
-View your app in AI Studio: https://ai.studio/apps/20848e5b-548c-41e6-b2f6-de4e7b1c1bd9
-## Run Locally
-**Prerequisites:**  Node.js
-1. Install dependencies:
-   `npm install`
-2. Set the `GEMINI_API_KEY` in [.env.local](.env.local) to your Gemini API key
-3. Run the app:
-   `npm run dev`

+---
+title: SPITITOUT
+emoji: 🔥
+colorFrom: red
+colorTo: green
+sdk: docker
+app_port: 7860
+---
 <div align="center">
 <img width="1200" height="475" alt="GHBanner" src="https://github.com/user-attachments/assets/0aa67016-6eaf-458a-adb2-6e31a0763ed6" />
 </div>
+# SPITITOUT
+This version is set up for a Hugging Face Docker Space. It does not use Gemini or any external model API; the Space loads local Hugging Face models for chat, speech recognition, and speech synthesis.
+## Models
+- Text on CPU Space: `Qwen/Qwen3-1.7B-GGUF` with llama.cpp / GGUF quantization
+- Text on GPU Space: `Qwen/Qwen3-4B-Instruct-2507` with Transformers, or vLLM if you add a dedicated vLLM server
+- Speech to text: `openai/whisper-tiny`
+- Text to speech: `hexgrad/Kokoro-82M` through the `kokoro` Python package, Mandarin voice `zf_xiaobei`
+For a CPU-only Space, keep `LLM_BACKEND=llamacpp`. To switch back to Transformers, set `LLM_BACKEND=transformers` and update `TEXT_MODEL`.
+## Run locally
+```bash
+npm install
+npm run build
+pip install -r requirements.txt
+python app.py
+```
+Open `http://localhost:7860`.
+## Deploy to Hugging Face Spaces
+Create a Docker Space, then push this folder. The included `Dockerfile` builds the React frontend and serves it from the FastAPI backend.

README_SPACE.md ADDED Viewed

	@@ -0,0 +1,52 @@

+# SPITITOUT Hugging Face Space
+This version runs without Gemini or any external model API. The React frontend calls a FastAPI backend inside the same Hugging Face Space.
+## Recommended models
+- Text on CPU: `Qwen/Qwen3-1.7B-GGUF`
+  - Served through `llama-cpp-python` using the official `Qwen3-1.7B-Q8_0.gguf` quantized file.
+- Text on GPU: `Qwen/Qwen3-4B-Instruct-2507`
+  - Use `LLM_BACKEND=transformers` for simple GPU deployment, or add vLLM as a separate server for higher throughput.
+- Speech to text: `openai/whisper-tiny`
+  - Small and multilingual. Use `openai/whisper-base` if accuracy is more important than latency.
+- Text to speech: `hexgrad/Kokoro-82M` via `kokoro`
+  - 82M parameters, lightweight, Apache licensed, and supports Mandarin voices such as `zf_xiaobei`.
+## Space settings
+Create the Space as a Docker Space, then push this folder.
+Suggested environment variables:
+```bash
+LLM_BACKEND=llamacpp
+GGUF_MODEL_REPO=Qwen/Qwen3-1.7B-GGUF
+GGUF_MODEL_FILE=Qwen3-1.7B-Q8_0.gguf
+LLAMA_CPP_N_CTX=4096
+ASR_MODEL=openai/whisper-tiny
+KOKORO_LANG_CODE=z
+KOKORO_VOICE=zf_xiaobei
+MAX_NEW_TOKENS=220
+```
+For CPU-only testing:
+```bash
+LLM_BACKEND=llamacpp
+GGUF_MODEL_REPO=Qwen/Qwen3-1.7B-GGUF
+GGUF_MODEL_FILE=Qwen3-1.7B-Q8_0.gguf
+ASR_MODEL=openai/whisper-tiny
+MAX_NEW_TOKENS=140
+```
+## Local run
+```bash
+npm install
+npm run build
+pip install -r requirements.txt
+python app.py
+```
+Then open `http://localhost:7860`.

app.py ADDED Viewed

	@@ -0,0 +1,383 @@

+import base64
+import io
+import os
+import re
+import tempfile
+from functools import lru_cache
+from pathlib import Path
+from typing import Literal
+import numpy as np
+import soundfile as sf
+import torch
+import uvicorn
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
+from huggingface_hub import hf_hub_download
+from pydantic import BaseModel
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from openai import OpenAI
+LLM_API = os.getenv("LLM_API", "").strip()
+LLM_API_BASE_URL = os.getenv("LLM_API_BASE_URL", "https://api.deepseek.com").strip()
+LLM_API_MODEL = os.getenv("LLM_API_MODEL", "deepseek-v4-flash").strip()
+LLM_BACKEND = os.getenv("LLM_BACKEND", "llamacpp").lower()
+TEXT_MODEL = os.getenv("TEXT_MODEL", "Qwen/Qwen3-4B-Instruct-2507")
+GGUF_MODEL_REPO = os.getenv("GGUF_MODEL_REPO", "Qwen/Qwen3-1.7B-GGUF")
+GGUF_MODEL_FILE = os.getenv("GGUF_MODEL_FILE", "Qwen3-1.7B-Q4_K_M.gguf")
+LLAMA_CPP_N_CTX = int(os.getenv("LLAMA_CPP_N_CTX", "4096"))
+LLAMA_CPP_N_THREADS = int(os.getenv("LLAMA_CPP_N_THREADS", str(max(1, os.cpu_count() or 1))))
+ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-tiny")
+KOKORO_LANG_CODE = os.getenv("KOKORO_LANG_CODE", "z")
+KOKORO_VOICE = os.getenv("KOKORO_VOICE", "zf_xiaobei")
+MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "220"))
+VENTING_SYSTEM_INSTRUCTION = """
+你是一个非常懂人性、充满热情的“情绪嘴替”伙伴。
+你的头号任务是：和用户站在一起，陪他们宣泄。
+规则：
+1. 不要讲大道理，不要劝大度。用户在生气时，道理是没用的。
+2. 使用感性、强烈、发泄性的词汇。如果用户在骂某人或某事，你要义愤填膺，表达出“这也太离谱了”、“我也是服了”这种情绪。
+3. 你的目标是让用户感到“有人懂我，有人替我出气”。
+4. 语气像一个铁哥们或闺蜜，语气词可以多一点。
+5. 遵守安全底线：不宣扬仇恨犯罪，不进行人身威胁，不鼓励现实伤害。
+6. 响应长度要多样化，不要每次都回差不多长度。
+"""
+GUIDING_SYSTEM_INSTRUCTION = """
+你现在是一个睿智、温和且具有同理心的心理导师。
+用户刚才已经发泄过情绪了，现在他们同意听听你的建议或开导。
+规则：
+1. 语气平和、坚定、宽容。
+2. 从客观角度分析问题，帮用户找到除了生气之外的解决方法，或者心理上的和解点。
+3. 肯定用户刚才发泄情绪的必要性，然后引导他们向前看。
+4. 每次回答不要太长，要循序渐进。
+5. 响应长度要根据用户状态变化。
+"""
+class Message(BaseModel):
+    role: Literal["user", "model"]
+    text: str
+    timestamp: int
+    audio: str | None = None
+    aiAudio: str | None = None
+class ChatRequest(BaseModel):
+    history: list[Message]
+    mode: Literal["VENTING", "GUIDING"]
+    audioBase64: str | None = None
+class SpeechRequest(BaseModel):
+    text: str
+app = FastAPI(title="SPITITOUT HF Space")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+def _device() -> str:
+    return "cuda" if torch.cuda.is_available() else "cpu"
+@lru_cache(maxsize=1)
+def get_llm():
+    tokenizer = AutoTokenizer.from_pretrained(TEXT_MODEL, trust_remote_code=True)
+    dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    model = AutoModelForCausalLM.from_pretrained(
+        TEXT_MODEL,
+        dtype=dtype,
+        device_map="auto" if torch.cuda.is_available() else None,
+        trust_remote_code=True,
+    )
+    if not torch.cuda.is_available():
+        model.to("cpu")
+    model.eval()
+    return tokenizer, model
+@lru_cache(maxsize=1)
+def get_llamacpp_llm():
+    try:
+        from llama_cpp import Llama
+    except Exception as exc:
+        raise RuntimeError(
+            "llama-cpp-python is not installed correctly. Check requirements.txt and Space build logs."
+        ) from exc
+    model_path = hf_hub_download(repo_id=GGUF_MODEL_REPO, filename=GGUF_MODEL_FILE)
+    return Llama(
+        model_path=model_path,
+        n_ctx=LLAMA_CPP_N_CTX,
+        n_threads=LLAMA_CPP_N_THREADS,
+        n_gpu_layers=-1 if torch.cuda.is_available() else 0,
+        verbose=False,
+    )
+@lru_cache(maxsize=1)
+def get_api_client():
+    if not LLM_API:
+        raise RuntimeError("LLM_API is not set.")
+    return OpenAI(
+        api_key=LLM_API,
+        base_url=LLM_API_BASE_URL,
+    )
+def generate_reply_api(messages: list[dict[str, str]]) -> str:
+    client = get_api_client()
+    # API 模式也限制历史和输出，避免慢、贵、重复
+    api_messages = [msg.copy() for msg in messages]
+    response = client.chat.completions.create(
+        model=LLM_API_MODEL,
+        messages=api_messages,
+        max_tokens=min(MAX_NEW_TOKENS, 220),
+        temperature=0.85,
+        top_p=0.9,
+        stream=False,
+        extra_body={
+            "thinking": {"type": "disabled"}
+        },
+    )
+    text = response.choices[0].message.content or ""
+    return remove_thinking_blocks(text) or "我听到了，你继续说。"
+@lru_cache(maxsize=1)
+def get_asr():
+    device_id = 0 if torch.cuda.is_available() else -1
+    dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    return pipeline(
+        "automatic-speech-recognition",
+        model=ASR_MODEL,
+        torch_dtype=dtype,
+        device=device_id,
+    )
+@lru_cache(maxsize=1)
+def get_tts():
+    try:
+        from kokoro import KPipeline
+    except Exception as exc:
+        raise RuntimeError(
+            "Kokoro TTS is not installed correctly. Check requirements.txt and Space build logs."
+        ) from exc
+    return KPipeline(lang_code=KOKORO_LANG_CODE)
+def transcribe_audio(audio_base64: str) -> str:
+    audio_bytes = base64.b64decode(audio_base64)
+    with tempfile.NamedTemporaryFile(suffix=".webm", delete=True) as audio_file:
+        audio_file.write(audio_bytes)
+        audio_file.flush()
+        result = get_asr()(audio_file.name)
+    return str(result.get("text", "")).strip()
+# def build_chat_messages(request: ChatRequest, transcript: str | None) -> list[dict[str, str]]:
+#     system = VENTING_SYSTEM_INSTRUCTION if request.mode == "VENTING" else GUIDING_SYSTEM_INSTRUCTION
+#     messages = [{"role": "system", "content": system}]
+#     for index, msg in enumerate(request.history[-12:]):
+#         content = msg.text
+#         if transcript and index == len(request.history[-12:]) - 1 and msg.role == "user":
+#             content = transcript if content == "🎤 语音消息" else f"{content}\n\n语音补充：{transcript}"
+#         messages.append({
+#             "role": "assistant" if msg.role == "model" else "user",
+#             "content": content,
+#         })
+#     return messages
+def build_chat_messages(request: ChatRequest, transcript: str | None) -> list[dict[str, str]]:
+    system = VENTING_SYSTEM_INSTRUCTION if request.mode == "VENTING" else GUIDING_SYSTEM_INSTRUCTION
+    system += """
+额外规则：
+1. 不要复述上一轮回答。
+2. 不要使用和上一轮相同的开头。
+3. 用户只发短句时，只针对这句短句回应，不要把旧话题整段重复。
+4. 每次最多 2 到 4 句话。
+"""
+    messages = [{"role": "system", "content": system}]
+    recent_history = request.history[-4:]
+    for index, msg in enumerate(recent_history):
+        content = msg.text
+        if transcript and index == len(recent_history) - 1 and msg.role == "user":
+            content = transcript if content == "🎤 语音消息" else f"{content}\n\n语音补充：{transcript}"
+        messages.append({
+            "role": "assistant" if msg.role == "model" else "user",
+            "content": content,
+        })
+    return messages
+def messages_to_prompt(messages: list[dict[str, str]]) -> str:
+    prompt = []
+    for msg in messages:
+        role = "assistant" if msg["role"] == "assistant" else msg["role"]
+        prompt.append(f"<|im_start|>{role}\n{msg['content']}<|im_end|>")
+    prompt.append("<|im_start|>assistant\n")
+    return "\n".join(prompt)
+def remove_thinking_blocks(text: str) -> str:
+    text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE)
+    return text.strip()
+def generate_reply(messages: list[dict[str, str]]) -> str:
+    if LLM_API:
+        return generate_reply_api(messages)
+    if LLM_BACKEND == "llamacpp":
+        return generate_reply_llamacpp(messages)
+    return generate_reply_transformers(messages)
+def generate_reply_llamacpp(messages: list[dict[str, str]]) -> str:
+    llm = get_llamacpp_llm()
+    no_think_messages = [msg.copy() for msg in messages]
+    for msg in reversed(no_think_messages):
+        if msg["role"] == "user":
+            msg["content"] = f"{msg['content']}\n/no_think"
+            break
+    prompt = messages_to_prompt(no_think_messages)
+    output = llm(
+        prompt,
+        max_tokens=MAX_NEW_TOKENS,
+        temperature=0.7,
+        top_p=0.8,
+        repeat_penalty=1.12,
+        stop=["<|im_end|>", "<|endoftext|>"],
+    )
+    text = output["choices"][0]["text"]
+    return remove_thinking_blocks(text) or "我听到了，你继续说。"
+def generate_reply_transformers(messages: list[dict[str, str]]) -> str:
+    tokenizer, model = get_llm()
+    try:
+        prompt = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            enable_thinking=False,
+        )
+    except TypeError:
+        prompt = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+    inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
+    with torch.inference_mode():
+        output_ids = model.generate(
+            **inputs,
+            max_new_tokens=MAX_NEW_TOKENS,
+            do_sample=True,
+            temperature=0.85,
+            top_p=0.9,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+    generated_ids = output_ids[0][inputs.input_ids.shape[-1]:]
+    text = tokenizer.decode(generated_ids, skip_special_tokens=True)
+    return remove_thinking_blocks(text) or "我听到了，你继续说。"
+def synthesize_speech(text: str) -> str | None:
+    if not text.strip():
+        return None
+    pipeline_tts = get_tts()
+    chunks = []
+    for _, _, audio in pipeline_tts(text[:500], voice=KOKORO_VOICE, speed=1.05):
+        chunks.append(np.asarray(audio, dtype=np.float32))
+    if not chunks:
+        return None
+    audio = np.concatenate(chunks)
+    wav_io = io.BytesIO()
+    sf.write(wav_io, audio, 24000, format="WAV")
+    return base64.b64encode(wav_io.getvalue()).decode("utf-8")
+@app.get("/api/health")
+def health():
+    return {
+        "ok": True,
+        "runtime": "api" if LLM_API else "local",
+        "llm_backend": "deepseek_api" if LLM_API else "llamacpp",
+        "llm_api_base_url": LLM_API_BASE_URL if LLM_API else None,
+        "llm_api_model": LLM_API_MODEL if LLM_API else None,
+        "text_model": TEXT_MODEL,
+        "gguf_model_repo": GGUF_MODEL_REPO,
+        "gguf_model_file": GGUF_MODEL_FILE,
+        "asr_model": ASR_MODEL,
+        "kokoro_lang_code": KOKORO_LANG_CODE,
+        "kokoro_voice": KOKORO_VOICE,
+        "device": _device(),
+    }
+@app.post("/api/chat")
+def chat(request: ChatRequest):
+    try:
+        transcript = transcribe_audio(request.audioBase64) if request.audioBase64 else None
+        messages = build_chat_messages(request, transcript)
+        return {"text": generate_reply(messages), "transcript": transcript}
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
+@app.post("/api/speech")
+def speech(request: SpeechRequest):
+    try:
+        return {"audio": synthesize_speech(request.text)}
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
+dist_dir = Path(__file__).parent / "dist"
+if dist_dir.exists():
+    app.mount("/assets", StaticFiles(directory=dist_dir / "assets"), name="assets")
+@app.get("/{path:path}")
+def frontend(path: str):
+    requested = dist_dir / path
+    if requested.is_file():
+        return FileResponse(requested)
+    index = dist_dir / "index.html"
+    if index.exists():
+        return FileResponse(index)
+    return {"message": "Run npm run build before serving the Space frontend."}
+if __name__ == "__main__":
+    port = int(os.getenv("PORT", "7860"))
+    uvicorn.run(app, host="0.0.0.0", port=port)

package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

package.json CHANGED Viewed

@@ -11,7 +11,6 @@
     "lint": "tsc --noEmit"
   },
   "dependencies": {
-    "@google/genai": "^1.29.0",
     "@tailwindcss/vite": "^4.1.14",
     "@vitejs/plugin-react": "^5.0.4",
     "lucide-react": "^0.546.0",

     "lint": "tsc --noEmit"
   },
   "dependencies": {
     "@tailwindcss/vite": "^4.1.14",
     "@vitejs/plugin-react": "^5.0.4",
     "lucide-react": "^0.546.0",

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+fastapi==0.115.6
+uvicorn[standard]==0.34.0
+pydantic==2.10.4
+torch==2.5.1
+transformers>=4.57.0
+accelerate>=1.2.1
+llama-cpp-python>=0.3.16
+sentencepiece==0.2.0
+soundfile==0.12.1
+librosa==0.10.2.post1
+kokoro>=0.9.4
+misaki[zh]>=0.9.4
+openai

src/components/Chat.tsx CHANGED Viewed

@@ -1,7 +1,7 @@
 import React, { useState, useEffect, useRef } from "react";
 import { motion, AnimatePresence } from "motion/react";
-import { Send, Trash2, Heart, Flame, ShieldAlert, Sparkles, MessageSquare, Mic, Square, Play, Pause, Volume2 } from "lucide-react";
-import { chatWithGemini, generateSpeech, ChatMode, Message } from "../services/geminiService";
 export default function Chat() {
   const [messages, setMessages] = useState<Message[]>([]);
@@ -12,8 +12,8 @@ export default function Chat() {
   const [isRecording, setIsRecording] = useState(false);
   const [recordedAudio, setRecordedAudio] = useState<string | null>(null);
   const [mediaRecorder, setMediaRecorder] = useState<MediaRecorder | null>(null);
   const scrollRef = useRef<HTMLDivElement>(null);
-  const audioContextRef = useRef<AudioContext | null>(null);
   // Auto-scroll to bottom
   useEffect(() => {
@@ -56,33 +56,28 @@ export default function Chat() {
     }
   };
-  const playPCM = async (base64: string) => {
-    if (!audioContextRef.current) {
-      audioContextRef.current = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: 24000 });
-    }
-    const ctx = audioContextRef.current;
-    // Decoding raw PCM 16-bit Le
-    const binary = atob(base64);
-    const len = binary.length;
-    const bytes = new Uint8Array(len);
-    for (let i = 0; i < len; i++) {
-        bytes[i] = binary.charCodeAt(i);
-    }
-    const arrayBuffer = bytes.buffer;
-    const audioBuffer = ctx.createBuffer(1, arrayBuffer.byteLength / 2, 24000);
-    const channelData = audioBuffer.getChannelData(0);
-    const dataView = new DataView(arrayBuffer);
-    for (let i = 0; i < audioBuffer.length; i++) {
-        channelData[i] = dataView.getInt16(i * 2, true) / 32768;
     }
-    const source = ctx.createBufferSource();
-    source.buffer = audioBuffer;
-    source.connect(ctx.destination);
-    source.start();
   };
   const handleSend = async (audioPayload?: string) => {
@@ -105,25 +100,17 @@ export default function Chat() {
     setRecordedAudio(null);
     setIsLoading(true);
-    const response = await chatWithGemini(newMessages, mode, finalAudio || undefined);
-    // Generate TTS for the response
-    const aiAudio = await generateSpeech(response);
     const aiMessage: Message = {
       role: "model",
       text: response,
       timestamp: Date.now(),
-      aiAudio: aiAudio || undefined
     };
     setMessages([...newMessages, aiMessage]);
     setIsLoading(false);
-    if (aiAudio) {
-      playPCM(aiAudio).catch(console.error);
-    }
     // Suggest switching to Guiding mode after 4 user messages in Venting mode
     if (mode === ChatMode.VENTING && newMessages.filter(m => m.role === "user").length >= 4) {
       setShowSwitchPrompt(true);
@@ -242,13 +229,20 @@ export default function Chat() {
                 )}
                 <p className="text-sm leading-relaxed whitespace-pre-wrap">{msg.text}</p>
-                {msg.aiAudio && (
                   <button
-                    onClick={() => playPCM(msg.aiAudio!)}
                     className="mt-2 flex items-center gap-2 px-3 py-1 rounded-full bg-white/10 hover:bg-white/20 transition-colors text-[10px] text-white/80"
                   >
-                    <Play size={10} fill="currentColor" />
-                    <span>重放语音</span>
                   </button>
                 )}

 import React, { useState, useEffect, useRef } from "react";
 import { motion, AnimatePresence } from "motion/react";
+import { Send, Trash2, Flame, Sparkles, MessageSquare, Mic, Square, Play, Volume2, Loader2 } from "lucide-react";
+import { chatWithSpaceModel, generateSpeech, ChatMode, Message } from "../services/hfSpaceService";
 export default function Chat() {
   const [messages, setMessages] = useState<Message[]>([]);
   const [isRecording, setIsRecording] = useState(false);
   const [recordedAudio, setRecordedAudio] = useState<string | null>(null);
   const [mediaRecorder, setMediaRecorder] = useState<MediaRecorder | null>(null);
+  const [generatingSpeech, setGeneratingSpeech] = useState<number | null>(null);
   const scrollRef = useRef<HTMLDivElement>(null);
   // Auto-scroll to bottom
   useEffect(() => {
     }
   };
+  const playAudio = (base64Wav: string) => {
+    const audio = new Audio(`data:audio/wav;base64,${base64Wav}`);
+    return audio.play();
+  };
+  const handleGenerateSpeech = async (message: Message) => {
+    if (message.aiAudio) {
+      playAudio(message.aiAudio).catch(console.error);
+      return;
     }
+    if (generatingSpeech !== null) return;
+    setGeneratingSpeech(message.timestamp);
+    const aiAudio = await generateSpeech(message.text);
+    setGeneratingSpeech(null);
+    if (!aiAudio) return;
+    setMessages(prev => prev.map(item => (
+      item.timestamp === message.timestamp ? { ...item, aiAudio } : item
+    )));
+    playAudio(aiAudio).catch(console.error);
   };
   const handleSend = async (audioPayload?: string) => {
     setRecordedAudio(null);
     setIsLoading(true);
+    const response = await chatWithSpaceModel(newMessages, mode, finalAudio || undefined);
     const aiMessage: Message = {
       role: "model",
       text: response,
       timestamp: Date.now(),
     };
     setMessages([...newMessages, aiMessage]);
     setIsLoading(false);
     // Suggest switching to Guiding mode after 4 user messages in Venting mode
     if (mode === ChatMode.VENTING && newMessages.filter(m => m.role === "user").length >= 4) {
       setShowSwitchPrompt(true);
                 )}
                 <p className="text-sm leading-relaxed whitespace-pre-wrap">{msg.text}</p>
+                {msg.role === "model" && (
                   <button
+                    onClick={() => handleGenerateSpeech(msg)}
+                    disabled={generatingSpeech !== null && generatingSpeech !== msg.timestamp}
                     className="mt-2 flex items-center gap-2 px-3 py-1 rounded-full bg-white/10 hover:bg-white/20 transition-colors text-[10px] text-white/80"
                   >
+                    {generatingSpeech === msg.timestamp ? (
+                      <Loader2 size={10} className="animate-spin" />
+                    ) : msg.aiAudio ? (
+                      <Play size={10} fill="currentColor" />
+                    ) : (
+                      <Volume2 size={10} />
+                    )}
+                    <span>{generatingSpeech === msg.timestamp ? "生成中..." : msg.aiAudio ? "播放语音" : "生成语音"}</span>
                   </button>
                 )}

src/services/geminiService.ts DELETED Viewed

@@ -1,118 +0,0 @@
-import { GoogleGenAI, Type } from "@google/genai";
-const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY || "" });
-export enum ChatMode {
-  VENTING = "VENTING",
-  GUIDING = "GUIDING",
-}
-export interface Message {
-  role: "user" | "model";
-  text: string;
-  timestamp: number;
-  audio?: string; // base64 audio string (for user messages)
-  aiAudio?: string; // base64 audio string (for model responses)
-}
-const VENTING_SYSTEM_INSTRUCTION = `
-你是一个非常懂人性、充满热情的“情绪嘴替”伙伴。
-你的头号任务是：和用户站在一起，陪他们骂，陪他们宣泄！
-规则建议：
-1. 不要讲大道理，不要劝大度。用户在生气时，道理是没用的。
-2. 使用感性、强烈、发泄性的词汇。如果用户在骂某人或某事，你也要义愤填膺，表达出“这也太离谱了”、“我也是服了”这种情绪。
-3. 你的目标是让用户感到“有人懂我，有人替我出气”。
-4. 你的语气要像一个铁哥们或闺蜜，语气词可以多一点（比如：真是绝了、气死我了、这什么人啊）。
-5. 即使在发泄模式下，也要遵守底线：不宣扬仇恨犯罪，不进行人身威胁。重点在于宣泄对不公平或不如意事情的情绪。
-6. **响应长度要多样化**：有时候可以只是简短的一句“我靠，太离谱了！”，有时候可以是一大段洋洋洒洒的帮腔。不要每次都回差不多长度的内容。
-`;
-const GUIDING_SYSTEM_INSTRUCTION = `
-你现在转型为一个睿智、温和且具有同理心的心理导师。
-用户刚才已经发泄过情绪了，现在他们同意听听你的建议或开导。
-规则建议：
-1. 语气变得平和、坚定、宽容。
-2. 从客观角度分析问题，帮用户找到除了生气之外的解决方法，或者心理上的和解点。
-3. 肯定用户刚才发泄情绪的必要性，然后引导他们向前看。
-4. 每次回答不要太长，要循序渐进。
-5. **响应长度要多样化**：根据用户的状态，有时候简短有力，有时候温情脉脉。
-`;
-export async function chatWithGemini(
-  history: Message[],
-  mode: ChatMode,
-  audioBase64?: string
-) {
-  const systemInstruction = mode === ChatMode.VENTING
-    ? VENTING_SYSTEM_INSTRUCTION
-    : GUIDING_SYSTEM_INSTRUCTION;
-  const contents = history.map(msg => {
-    const parts: any[] = [{ text: msg.text }];
-    if (msg.audio) {
-      parts.push({
-        inlineData: {
-          mimeType: "audio/webm", // MediaRecorder default is usually webm or ogg
-          data: msg.audio
-        }
-      });
-    }
-    return {
-      role: msg.role === "user" ? "user" : "model",
-      parts
-    };
-  });
-  // If there's new audio in this turn
-  if (audioBase64) {
-    const lastMsg = contents[contents.length - 1];
-    if (lastMsg && lastMsg.role === "user") {
-      lastMsg.parts.push({
-        inlineData: {
-          mimeType: "audio/webm",
-          data: audioBase64
-        }
-      });
-    }
-  }
-  try {
-    const response = await ai.models.generateContent({
-      model: "gemini-3-flash-preview",
-      contents,
-      config: {
-        systemInstruction,
-        temperature: 0.9,
-      },
-    });
-    return response.text || "喂？听得到吗？我刚才卡了一下。";
-  } catch (error) {
-    console.error("Gemini API Error:", error);
-    return "抱歉，我现在的能量不足以陪你继续了（API出错），休息一下？";
-  }
-}
-export async function generateSpeech(text: string) {
-  try {
-    const response = await ai.models.generateContent({
-      model: "gemini-3.1-flash-tts-preview",
-      contents: [{ parts: [{ text: `用一种充满情绪且真实的人工语音朗读：${text}` }] }],
-      config: {
-        responseModalities: ["AUDIO"],
-        speechConfig: {
-          voiceConfig: {
-            prebuiltVoiceConfig: { voiceName: 'Kore' }, // Kore sounds quite expressive
-          },
-        },
-      },
-    });
-    return response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
-  } catch (error) {
-    console.error("TTS Error:", error);
-    return null;
-  }
-}

src/services/hfSpaceService.ts ADDED Viewed

	@@ -0,0 +1,67 @@

+export enum ChatMode {
+  VENTING = "VENTING",
+  GUIDING = "GUIDING",
+}
+export interface Message {
+  role: "user" | "model";
+  text: string;
+  timestamp: number;
+  audio?: string; // base64 audio string (for user messages)
+  aiAudio?: string; // base64 WAV audio string (for model responses)
+}
+interface ChatResponse {
+  text?: string;
+  transcript?: string;
+  error?: string;
+}
+interface SpeechResponse {
+  audio?: string;
+  error?: string;
+}
+export async function chatWithSpaceModel(
+  history: Message[],
+  mode: ChatMode,
+  audioBase64?: string
+) {
+  try {
+    const response = await fetch("/api/chat", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ history, mode, audioBase64 }),
+    });
+    if (!response.ok) {
+      throw new Error(`Chat request failed: ${response.status}`);
+    }
+    const data = (await response.json()) as ChatResponse;
+    return data.text || "喂？听得到吗？我刚才卡了一下。";
+  } catch (error) {
+    console.error("HF Space chat error:", error);
+    return "抱歉，我现在的本地模型卡住了，稍等一下再试。";
+  }
+}
+export async function generateSpeech(text: string) {
+  try {
+    const response = await fetch("/api/speech", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ text }),
+    });
+    if (!response.ok) {
+      throw new Error(`Speech request failed: ${response.status}`);
+    }
+    const data = (await response.json()) as SpeechResponse;
+    return data.audio || null;
+  } catch (error) {
+    console.error("HF Space TTS error:", error);
+    return null;
+  }
+}

vite.config.ts CHANGED Viewed

@@ -1,15 +1,11 @@
 import tailwindcss from '@tailwindcss/vite';
 import react from '@vitejs/plugin-react';
 import path from 'path';
-import {defineConfig, loadEnv} from 'vite';
-export default defineConfig(({mode}) => {
-  const env = loadEnv(mode, '.', '');
   return {
     plugins: [react(), tailwindcss()],
-    define: {
-      'process.env.GEMINI_API_KEY': JSON.stringify(env.GEMINI_API_KEY),
-    },
     resolve: {
       alias: {
         '@': path.resolve(__dirname, '.'),

 import tailwindcss from '@tailwindcss/vite';
 import react from '@vitejs/plugin-react';
 import path from 'path';
+import {defineConfig} from 'vite';
+export default defineConfig(() => {
   return {
     plugins: [react(), tailwindcss()],
     resolve: {
       alias: {
         '@': path.resolve(__dirname, '.'),