MSF commited on
Commit
eb426ec
·
1 Parent(s): 0d3686b

with api option

Browse files
.env.example CHANGED
@@ -1,9 +1,11 @@
1
- # GEMINI_API_KEY: Required for Gemini AI API calls.
2
- # AI Studio automatically injects this at runtime from user secrets.
3
- # Users configure this via the Secrets panel in the AI Studio UI.
4
- GEMINI_API_KEY="MY_GEMINI_API_KEY"
 
 
 
 
 
5
 
6
- # APP_URL: The URL where this applet is hosted.
7
- # AI Studio automatically injects this at runtime with the Cloud Run service URL.
8
- # Used for self-referential links, OAuth callbacks, and API endpoints.
9
  APP_URL="MY_APP_URL"
 
1
+ LLM_BACKEND="llamacpp"
2
+ TEXT_MODEL="Qwen/Qwen3-4B-Instruct-2507"
3
+ GGUF_MODEL_REPO="Qwen/Qwen3-1.7B-GGUF"
4
+ GGUF_MODEL_FILE="Qwen3-1.7B-Q8_0.gguf"
5
+ LLAMA_CPP_N_CTX="4096"
6
+ ASR_MODEL="openai/whisper-tiny"
7
+ KOKORO_LANG_CODE="z"
8
+ KOKORO_VOICE="zf_xiaobei"
9
+ MAX_NEW_TOKENS="220"
10
 
 
 
 
11
  APP_URL="MY_APP_URL"
.gitignore CHANGED
@@ -1,8 +1,23 @@
1
  node_modules/
2
- build/
3
  dist/
4
- coverage/
5
  .DS_Store
6
- *.log
7
- .env*
8
- !.env.example
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  node_modules/
 
2
  dist/
3
+ __pycache__/
4
  .DS_Store
5
+ .env
6
+ .env.local
7
+
8
+ # local caches / model files
9
+ .cache/
10
+ *.incomplete
11
+
12
+ # python
13
+ venv/
14
+ .venv/
15
+ __pycache__/
16
+ *.py[cod]
17
+
18
+ # env / system
19
+ .env
20
+ .DS_Store
21
+
22
+ # node
23
+ node_modules/
Dockerfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM node:22-bookworm AS frontend
2
+
3
+ WORKDIR /app
4
+ COPY package*.json ./
5
+ RUN npm install
6
+ COPY . .
7
+ RUN npm run build
8
+
9
+ FROM python:3.11-slim
10
+
11
+ ENV PORT=7860 \
12
+ LLM_BACKEND=llamacpp \
13
+ TEXT_MODEL=Qwen/Qwen3-1.7B \
14
+ GGUF_MODEL_REPO=Qwen/Qwen3-1.7B-GGUF \
15
+ GGUF_MODEL_FILE=Qwen3-1.7B-Q4_K_M.gguf \
16
+ LLAMA_CPP_N_CTX=2048 \
17
+ ASR_MODEL=openai/whisper-tiny \
18
+ KOKORO_LANG_CODE=z \
19
+ KOKORO_VOICE=zf_xiaobei \
20
+ MAX_NEW_TOKENS=160 \
21
+ LLM_API_BASE_URL=https://api.deepseek.com \
22
+ LLM_API_MODEL=deepseek-v4-flash \
23
+ HF_HOME=/data/.huggingface \
24
+ HUGGINGFACE_HUB_CACHE=/data/.huggingface/hub \
25
+ TRANSFORMERS_CACHE=/data/.huggingface/transformers
26
+ WORKDIR /app
27
+
28
+ RUN apt-get update \
29
+ && apt-get install -y --no-install-recommends ffmpeg git espeak-ng build-essential cmake \
30
+ && rm -rf /var/lib/apt/lists/*
31
+
32
+ COPY requirements.txt .
33
+ RUN pip install --no-cache-dir --upgrade pip \
34
+ && pip install --no-cache-dir -r requirements.txt
35
+
36
+ COPY app.py .
37
+ COPY --from=frontend /app/dist ./dist
38
+
39
+ EXPOSE 7860
40
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,20 +1,40 @@
 
 
 
 
 
 
 
 
 
1
  <div align="center">
2
  <img width="1200" height="475" alt="GHBanner" src="https://github.com/user-attachments/assets/0aa67016-6eaf-458a-adb2-6e31a0763ed6" />
3
  </div>
4
 
5
- # Run and deploy your AI Studio app
 
 
 
 
 
 
 
 
 
6
 
7
- This contains everything you need to run your app locally.
8
 
9
- View your app in AI Studio: https://ai.studio/apps/20848e5b-548c-41e6-b2f6-de4e7b1c1bd9
10
 
11
- ## Run Locally
 
 
 
 
 
12
 
13
- **Prerequisites:** Node.js
14
 
 
15
 
16
- 1. Install dependencies:
17
- `npm install`
18
- 2. Set the `GEMINI_API_KEY` in [.env.local](.env.local) to your Gemini API key
19
- 3. Run the app:
20
- `npm run dev`
 
1
+ ---
2
+ title: SPITITOUT
3
+ emoji: 🔥
4
+ colorFrom: red
5
+ colorTo: green
6
+ sdk: docker
7
+ app_port: 7860
8
+ ---
9
+
10
  <div align="center">
11
  <img width="1200" height="475" alt="GHBanner" src="https://github.com/user-attachments/assets/0aa67016-6eaf-458a-adb2-6e31a0763ed6" />
12
  </div>
13
 
14
+ # SPITITOUT
15
+
16
+ This version is set up for a Hugging Face Docker Space. It does not use Gemini or any external model API; the Space loads local Hugging Face models for chat, speech recognition, and speech synthesis.
17
+
18
+ ## Models
19
+
20
+ - Text on CPU Space: `Qwen/Qwen3-1.7B-GGUF` with llama.cpp / GGUF quantization
21
+ - Text on GPU Space: `Qwen/Qwen3-4B-Instruct-2507` with Transformers, or vLLM if you add a dedicated vLLM server
22
+ - Speech to text: `openai/whisper-tiny`
23
+ - Text to speech: `hexgrad/Kokoro-82M` through the `kokoro` Python package, Mandarin voice `zf_xiaobei`
24
 
25
+ For a CPU-only Space, keep `LLM_BACKEND=llamacpp`. To switch back to Transformers, set `LLM_BACKEND=transformers` and update `TEXT_MODEL`.
26
 
27
+ ## Run locally
28
 
29
+ ```bash
30
+ npm install
31
+ npm run build
32
+ pip install -r requirements.txt
33
+ python app.py
34
+ ```
35
 
36
+ Open `http://localhost:7860`.
37
 
38
+ ## Deploy to Hugging Face Spaces
39
 
40
+ Create a Docker Space, then push this folder. The included `Dockerfile` builds the React frontend and serves it from the FastAPI backend.
 
 
 
 
README_SPACE.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPITITOUT Hugging Face Space
2
+
3
+ This version runs without Gemini or any external model API. The React frontend calls a FastAPI backend inside the same Hugging Face Space.
4
+
5
+ ## Recommended models
6
+
7
+ - Text on CPU: `Qwen/Qwen3-1.7B-GGUF`
8
+ - Served through `llama-cpp-python` using the official `Qwen3-1.7B-Q8_0.gguf` quantized file.
9
+ - Text on GPU: `Qwen/Qwen3-4B-Instruct-2507`
10
+ - Use `LLM_BACKEND=transformers` for simple GPU deployment, or add vLLM as a separate server for higher throughput.
11
+ - Speech to text: `openai/whisper-tiny`
12
+ - Small and multilingual. Use `openai/whisper-base` if accuracy is more important than latency.
13
+ - Text to speech: `hexgrad/Kokoro-82M` via `kokoro`
14
+ - 82M parameters, lightweight, Apache licensed, and supports Mandarin voices such as `zf_xiaobei`.
15
+
16
+ ## Space settings
17
+
18
+ Create the Space as a Docker Space, then push this folder.
19
+
20
+ Suggested environment variables:
21
+
22
+ ```bash
23
+ LLM_BACKEND=llamacpp
24
+ GGUF_MODEL_REPO=Qwen/Qwen3-1.7B-GGUF
25
+ GGUF_MODEL_FILE=Qwen3-1.7B-Q8_0.gguf
26
+ LLAMA_CPP_N_CTX=4096
27
+ ASR_MODEL=openai/whisper-tiny
28
+ KOKORO_LANG_CODE=z
29
+ KOKORO_VOICE=zf_xiaobei
30
+ MAX_NEW_TOKENS=220
31
+ ```
32
+
33
+ For CPU-only testing:
34
+
35
+ ```bash
36
+ LLM_BACKEND=llamacpp
37
+ GGUF_MODEL_REPO=Qwen/Qwen3-1.7B-GGUF
38
+ GGUF_MODEL_FILE=Qwen3-1.7B-Q8_0.gguf
39
+ ASR_MODEL=openai/whisper-tiny
40
+ MAX_NEW_TOKENS=140
41
+ ```
42
+
43
+ ## Local run
44
+
45
+ ```bash
46
+ npm install
47
+ npm run build
48
+ pip install -r requirements.txt
49
+ python app.py
50
+ ```
51
+
52
+ Then open `http://localhost:7860`.
app.py ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ import os
4
+ import re
5
+ import tempfile
6
+ from functools import lru_cache
7
+ from pathlib import Path
8
+ from typing import Literal
9
+
10
+ import numpy as np
11
+ import soundfile as sf
12
+ import torch
13
+ import uvicorn
14
+ from fastapi import FastAPI, HTTPException
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ from fastapi.responses import FileResponse
17
+ from fastapi.staticfiles import StaticFiles
18
+ from huggingface_hub import hf_hub_download
19
+ from pydantic import BaseModel
20
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
21
+ from openai import OpenAI
22
+
23
+ LLM_API = os.getenv("LLM_API", "").strip()
24
+ LLM_API_BASE_URL = os.getenv("LLM_API_BASE_URL", "https://api.deepseek.com").strip()
25
+ LLM_API_MODEL = os.getenv("LLM_API_MODEL", "deepseek-v4-flash").strip()
26
+
27
+ LLM_BACKEND = os.getenv("LLM_BACKEND", "llamacpp").lower()
28
+ TEXT_MODEL = os.getenv("TEXT_MODEL", "Qwen/Qwen3-4B-Instruct-2507")
29
+ GGUF_MODEL_REPO = os.getenv("GGUF_MODEL_REPO", "Qwen/Qwen3-1.7B-GGUF")
30
+ GGUF_MODEL_FILE = os.getenv("GGUF_MODEL_FILE", "Qwen3-1.7B-Q4_K_M.gguf")
31
+ LLAMA_CPP_N_CTX = int(os.getenv("LLAMA_CPP_N_CTX", "4096"))
32
+ LLAMA_CPP_N_THREADS = int(os.getenv("LLAMA_CPP_N_THREADS", str(max(1, os.cpu_count() or 1))))
33
+ ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-tiny")
34
+ KOKORO_LANG_CODE = os.getenv("KOKORO_LANG_CODE", "z")
35
+ KOKORO_VOICE = os.getenv("KOKORO_VOICE", "zf_xiaobei")
36
+ MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "220"))
37
+
38
+ VENTING_SYSTEM_INSTRUCTION = """
39
+ 你是一个非常懂人性、充满热情的“情绪嘴替”伙伴。
40
+ 你的头号任务是:和用户站在一起,陪他们宣泄。
41
+
42
+ 规则:
43
+ 1. 不要讲大道理,不要劝大度。用户在生气时,道理是没用的。
44
+ 2. 使用感性、强烈、发泄性的词汇。如果用户在骂某人或某事,你要义愤填膺,表达出“这也太离谱了”、“我也是服了”这种情绪。
45
+ 3. 你的目标是让用户感到“有人懂我,有人替我出气”。
46
+ 4. 语气像一个铁哥们或闺蜜,语气词可以多一点。
47
+ 5. 遵守安全底线:不宣扬仇恨犯罪,不进行人身威胁,不鼓励现实伤害。
48
+ 6. 响应长度要多样化,不要每次都回差不多长度。
49
+ """
50
+
51
+ GUIDING_SYSTEM_INSTRUCTION = """
52
+ 你现在是一个睿智、温和且具有同理心的心理导师。
53
+ 用户刚才已经发泄过情绪了,现在他们同意听听你的建议或开导。
54
+
55
+ 规则:
56
+ 1. 语气平和、坚定、宽容。
57
+ 2. 从客观角度分析问题,帮用户找到除了生气之外的解决方法,或者心理上的和解点。
58
+ 3. 肯定用户刚才发泄情绪的必要性,然后引导他们向前看。
59
+ 4. 每次回答不要太长,要循序渐进。
60
+ 5. 响应长度要根据用户状态变化。
61
+ """
62
+
63
+
64
+ class Message(BaseModel):
65
+ role: Literal["user", "model"]
66
+ text: str
67
+ timestamp: int
68
+ audio: str | None = None
69
+ aiAudio: str | None = None
70
+
71
+
72
+ class ChatRequest(BaseModel):
73
+ history: list[Message]
74
+ mode: Literal["VENTING", "GUIDING"]
75
+ audioBase64: str | None = None
76
+
77
+
78
+ class SpeechRequest(BaseModel):
79
+ text: str
80
+
81
+
82
+ app = FastAPI(title="SPITITOUT HF Space")
83
+ app.add_middleware(
84
+ CORSMiddleware,
85
+ allow_origins=["*"],
86
+ allow_credentials=True,
87
+ allow_methods=["*"],
88
+ allow_headers=["*"],
89
+ )
90
+
91
+
92
+ def _device() -> str:
93
+ return "cuda" if torch.cuda.is_available() else "cpu"
94
+
95
+
96
+ @lru_cache(maxsize=1)
97
+ def get_llm():
98
+ tokenizer = AutoTokenizer.from_pretrained(TEXT_MODEL, trust_remote_code=True)
99
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
100
+ model = AutoModelForCausalLM.from_pretrained(
101
+ TEXT_MODEL,
102
+ dtype=dtype,
103
+ device_map="auto" if torch.cuda.is_available() else None,
104
+ trust_remote_code=True,
105
+ )
106
+ if not torch.cuda.is_available():
107
+ model.to("cpu")
108
+ model.eval()
109
+ return tokenizer, model
110
+
111
+
112
+ @lru_cache(maxsize=1)
113
+ def get_llamacpp_llm():
114
+ try:
115
+ from llama_cpp import Llama
116
+ except Exception as exc:
117
+ raise RuntimeError(
118
+ "llama-cpp-python is not installed correctly. Check requirements.txt and Space build logs."
119
+ ) from exc
120
+
121
+ model_path = hf_hub_download(repo_id=GGUF_MODEL_REPO, filename=GGUF_MODEL_FILE)
122
+ return Llama(
123
+ model_path=model_path,
124
+ n_ctx=LLAMA_CPP_N_CTX,
125
+ n_threads=LLAMA_CPP_N_THREADS,
126
+ n_gpu_layers=-1 if torch.cuda.is_available() else 0,
127
+ verbose=False,
128
+ )
129
+
130
+
131
+ @lru_cache(maxsize=1)
132
+ def get_api_client():
133
+ if not LLM_API:
134
+ raise RuntimeError("LLM_API is not set.")
135
+ return OpenAI(
136
+ api_key=LLM_API,
137
+ base_url=LLM_API_BASE_URL,
138
+ )
139
+
140
+
141
+ def generate_reply_api(messages: list[dict[str, str]]) -> str:
142
+ client = get_api_client()
143
+
144
+ # API 模式也限制历史和输出,避免慢、贵、重复
145
+ api_messages = [msg.copy() for msg in messages]
146
+
147
+ response = client.chat.completions.create(
148
+ model=LLM_API_MODEL,
149
+ messages=api_messages,
150
+ max_tokens=min(MAX_NEW_TOKENS, 220),
151
+ temperature=0.85,
152
+ top_p=0.9,
153
+ stream=False,
154
+ extra_body={
155
+ "thinking": {"type": "disabled"}
156
+ },
157
+ )
158
+
159
+ text = response.choices[0].message.content or ""
160
+ return remove_thinking_blocks(text) or "我听到了,你继续说。"
161
+
162
+
163
+ @lru_cache(maxsize=1)
164
+ def get_asr():
165
+ device_id = 0 if torch.cuda.is_available() else -1
166
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
167
+ return pipeline(
168
+ "automatic-speech-recognition",
169
+ model=ASR_MODEL,
170
+ torch_dtype=dtype,
171
+ device=device_id,
172
+ )
173
+
174
+
175
+ @lru_cache(maxsize=1)
176
+ def get_tts():
177
+ try:
178
+ from kokoro import KPipeline
179
+ except Exception as exc:
180
+ raise RuntimeError(
181
+ "Kokoro TTS is not installed correctly. Check requirements.txt and Space build logs."
182
+ ) from exc
183
+
184
+ return KPipeline(lang_code=KOKORO_LANG_CODE)
185
+
186
+
187
+ def transcribe_audio(audio_base64: str) -> str:
188
+ audio_bytes = base64.b64decode(audio_base64)
189
+ with tempfile.NamedTemporaryFile(suffix=".webm", delete=True) as audio_file:
190
+ audio_file.write(audio_bytes)
191
+ audio_file.flush()
192
+ result = get_asr()(audio_file.name)
193
+ return str(result.get("text", "")).strip()
194
+
195
+
196
+ # def build_chat_messages(request: ChatRequest, transcript: str | None) -> list[dict[str, str]]:
197
+ # system = VENTING_SYSTEM_INSTRUCTION if request.mode == "VENTING" else GUIDING_SYSTEM_INSTRUCTION
198
+ # messages = [{"role": "system", "content": system}]
199
+
200
+ # for index, msg in enumerate(request.history[-12:]):
201
+ # content = msg.text
202
+ # if transcript and index == len(request.history[-12:]) - 1 and msg.role == "user":
203
+ # content = transcript if content == "🎤 语音消息" else f"{content}\n\n语音补充:{transcript}"
204
+ # messages.append({
205
+ # "role": "assistant" if msg.role == "model" else "user",
206
+ # "content": content,
207
+ # })
208
+
209
+ # return messages
210
+
211
+
212
+ def build_chat_messages(request: ChatRequest, transcript: str | None) -> list[dict[str, str]]:
213
+ system = VENTING_SYSTEM_INSTRUCTION if request.mode == "VENTING" else GUIDING_SYSTEM_INSTRUCTION
214
+
215
+ system += """
216
+ 额外规则:
217
+ 1. 不要复述上一轮回答。
218
+ 2. 不要使用和上一轮相同的开头。
219
+ 3. 用户只发短句时,只针对这句短句回应,不要把旧话题整段重复。
220
+ 4. 每次最多 2 到 4 句话。
221
+ """
222
+
223
+ messages = [{"role": "system", "content": system}]
224
+
225
+ recent_history = request.history[-4:]
226
+
227
+ for index, msg in enumerate(recent_history):
228
+ content = msg.text
229
+ if transcript and index == len(recent_history) - 1 and msg.role == "user":
230
+ content = transcript if content == "🎤 语音消息" else f"{content}\n\n语音补充:{transcript}"
231
+
232
+ messages.append({
233
+ "role": "assistant" if msg.role == "model" else "user",
234
+ "content": content,
235
+ })
236
+
237
+ return messages
238
+
239
+ def messages_to_prompt(messages: list[dict[str, str]]) -> str:
240
+ prompt = []
241
+ for msg in messages:
242
+ role = "assistant" if msg["role"] == "assistant" else msg["role"]
243
+ prompt.append(f"<|im_start|>{role}\n{msg['content']}<|im_end|>")
244
+ prompt.append("<|im_start|>assistant\n")
245
+ return "\n".join(prompt)
246
+
247
+
248
+ def remove_thinking_blocks(text: str) -> str:
249
+ text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE)
250
+ return text.strip()
251
+
252
+
253
+ def generate_reply(messages: list[dict[str, str]]) -> str:
254
+ if LLM_API:
255
+ return generate_reply_api(messages)
256
+ if LLM_BACKEND == "llamacpp":
257
+ return generate_reply_llamacpp(messages)
258
+ return generate_reply_transformers(messages)
259
+
260
+
261
+ def generate_reply_llamacpp(messages: list[dict[str, str]]) -> str:
262
+ llm = get_llamacpp_llm()
263
+ no_think_messages = [msg.copy() for msg in messages]
264
+ for msg in reversed(no_think_messages):
265
+ if msg["role"] == "user":
266
+ msg["content"] = f"{msg['content']}\n/no_think"
267
+ break
268
+ prompt = messages_to_prompt(no_think_messages)
269
+ output = llm(
270
+ prompt,
271
+ max_tokens=MAX_NEW_TOKENS,
272
+ temperature=0.7,
273
+ top_p=0.8,
274
+ repeat_penalty=1.12,
275
+ stop=["<|im_end|>", "<|endoftext|>"],
276
+ )
277
+ text = output["choices"][0]["text"]
278
+ return remove_thinking_blocks(text) or "我听到了,你继续说。"
279
+
280
+
281
+ def generate_reply_transformers(messages: list[dict[str, str]]) -> str:
282
+ tokenizer, model = get_llm()
283
+ try:
284
+ prompt = tokenizer.apply_chat_template(
285
+ messages,
286
+ tokenize=False,
287
+ add_generation_prompt=True,
288
+ enable_thinking=False,
289
+ )
290
+ except TypeError:
291
+ prompt = tokenizer.apply_chat_template(
292
+ messages,
293
+ tokenize=False,
294
+ add_generation_prompt=True,
295
+ )
296
+
297
+ inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
298
+ with torch.inference_mode():
299
+ output_ids = model.generate(
300
+ **inputs,
301
+ max_new_tokens=MAX_NEW_TOKENS,
302
+ do_sample=True,
303
+ temperature=0.85,
304
+ top_p=0.9,
305
+ pad_token_id=tokenizer.eos_token_id,
306
+ )
307
+ generated_ids = output_ids[0][inputs.input_ids.shape[-1]:]
308
+ text = tokenizer.decode(generated_ids, skip_special_tokens=True)
309
+ return remove_thinking_blocks(text) or "我听到了,你继续说。"
310
+
311
+
312
+ def synthesize_speech(text: str) -> str | None:
313
+ if not text.strip():
314
+ return None
315
+
316
+ pipeline_tts = get_tts()
317
+ chunks = []
318
+ for _, _, audio in pipeline_tts(text[:500], voice=KOKORO_VOICE, speed=1.05):
319
+ chunks.append(np.asarray(audio, dtype=np.float32))
320
+ if not chunks:
321
+ return None
322
+
323
+ audio = np.concatenate(chunks)
324
+ wav_io = io.BytesIO()
325
+ sf.write(wav_io, audio, 24000, format="WAV")
326
+ return base64.b64encode(wav_io.getvalue()).decode("utf-8")
327
+
328
+
329
+ @app.get("/api/health")
330
+ def health():
331
+ return {
332
+ "ok": True,
333
+ "runtime": "api" if LLM_API else "local",
334
+ "llm_backend": "deepseek_api" if LLM_API else "llamacpp",
335
+ "llm_api_base_url": LLM_API_BASE_URL if LLM_API else None,
336
+ "llm_api_model": LLM_API_MODEL if LLM_API else None,
337
+ "text_model": TEXT_MODEL,
338
+ "gguf_model_repo": GGUF_MODEL_REPO,
339
+ "gguf_model_file": GGUF_MODEL_FILE,
340
+ "asr_model": ASR_MODEL,
341
+ "kokoro_lang_code": KOKORO_LANG_CODE,
342
+ "kokoro_voice": KOKORO_VOICE,
343
+ "device": _device(),
344
+ }
345
+
346
+
347
+ @app.post("/api/chat")
348
+ def chat(request: ChatRequest):
349
+ try:
350
+ transcript = transcribe_audio(request.audioBase64) if request.audioBase64 else None
351
+ messages = build_chat_messages(request, transcript)
352
+ return {"text": generate_reply(messages), "transcript": transcript}
353
+ except Exception as exc:
354
+ raise HTTPException(status_code=500, detail=str(exc)) from exc
355
+
356
+
357
+ @app.post("/api/speech")
358
+ def speech(request: SpeechRequest):
359
+ try:
360
+ return {"audio": synthesize_speech(request.text)}
361
+ except Exception as exc:
362
+ raise HTTPException(status_code=500, detail=str(exc)) from exc
363
+
364
+
365
+ dist_dir = Path(__file__).parent / "dist"
366
+ if dist_dir.exists():
367
+ app.mount("/assets", StaticFiles(directory=dist_dir / "assets"), name="assets")
368
+
369
+
370
+ @app.get("/{path:path}")
371
+ def frontend(path: str):
372
+ requested = dist_dir / path
373
+ if requested.is_file():
374
+ return FileResponse(requested)
375
+ index = dist_dir / "index.html"
376
+ if index.exists():
377
+ return FileResponse(index)
378
+ return {"message": "Run npm run build before serving the Space frontend."}
379
+
380
+
381
+ if __name__ == "__main__":
382
+ port = int(os.getenv("PORT", "7860"))
383
+ uvicorn.run(app, host="0.0.0.0", port=port)
package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
package.json CHANGED
@@ -11,7 +11,6 @@
11
  "lint": "tsc --noEmit"
12
  },
13
  "dependencies": {
14
- "@google/genai": "^1.29.0",
15
  "@tailwindcss/vite": "^4.1.14",
16
  "@vitejs/plugin-react": "^5.0.4",
17
  "lucide-react": "^0.546.0",
 
11
  "lint": "tsc --noEmit"
12
  },
13
  "dependencies": {
 
14
  "@tailwindcss/vite": "^4.1.14",
15
  "@vitejs/plugin-react": "^5.0.4",
16
  "lucide-react": "^0.546.0",
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.6
2
+ uvicorn[standard]==0.34.0
3
+ pydantic==2.10.4
4
+ torch==2.5.1
5
+ transformers>=4.57.0
6
+ accelerate>=1.2.1
7
+ llama-cpp-python>=0.3.16
8
+ sentencepiece==0.2.0
9
+ soundfile==0.12.1
10
+ librosa==0.10.2.post1
11
+ kokoro>=0.9.4
12
+ misaki[zh]>=0.9.4
13
+ openai
src/components/Chat.tsx CHANGED
@@ -1,7 +1,7 @@
1
  import React, { useState, useEffect, useRef } from "react";
2
  import { motion, AnimatePresence } from "motion/react";
3
- import { Send, Trash2, Heart, Flame, ShieldAlert, Sparkles, MessageSquare, Mic, Square, Play, Pause, Volume2 } from "lucide-react";
4
- import { chatWithGemini, generateSpeech, ChatMode, Message } from "../services/geminiService";
5
 
6
  export default function Chat() {
7
  const [messages, setMessages] = useState<Message[]>([]);
@@ -12,8 +12,8 @@ export default function Chat() {
12
  const [isRecording, setIsRecording] = useState(false);
13
  const [recordedAudio, setRecordedAudio] = useState<string | null>(null);
14
  const [mediaRecorder, setMediaRecorder] = useState<MediaRecorder | null>(null);
 
15
  const scrollRef = useRef<HTMLDivElement>(null);
16
- const audioContextRef = useRef<AudioContext | null>(null);
17
 
18
  // Auto-scroll to bottom
19
  useEffect(() => {
@@ -56,33 +56,28 @@ export default function Chat() {
56
  }
57
  };
58
 
59
- const playPCM = async (base64: string) => {
60
- if (!audioContextRef.current) {
61
- audioContextRef.current = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: 24000 });
62
- }
63
- const ctx = audioContextRef.current;
64
-
65
- // Decoding raw PCM 16-bit Le
66
- const binary = atob(base64);
67
- const len = binary.length;
68
- const bytes = new Uint8Array(len);
69
- for (let i = 0; i < len; i++) {
70
- bytes[i] = binary.charCodeAt(i);
71
- }
72
-
73
- const arrayBuffer = bytes.buffer;
74
- const audioBuffer = ctx.createBuffer(1, arrayBuffer.byteLength / 2, 24000);
75
- const channelData = audioBuffer.getChannelData(0);
76
- const dataView = new DataView(arrayBuffer);
77
-
78
- for (let i = 0; i < audioBuffer.length; i++) {
79
- channelData[i] = dataView.getInt16(i * 2, true) / 32768;
80
  }
81
-
82
- const source = ctx.createBufferSource();
83
- source.buffer = audioBuffer;
84
- source.connect(ctx.destination);
85
- source.start();
 
 
 
 
 
 
 
86
  };
87
 
88
  const handleSend = async (audioPayload?: string) => {
@@ -105,25 +100,17 @@ export default function Chat() {
105
  setRecordedAudio(null);
106
  setIsLoading(true);
107
 
108
- const response = await chatWithGemini(newMessages, mode, finalAudio || undefined);
109
-
110
- // Generate TTS for the response
111
- const aiAudio = await generateSpeech(response);
112
 
113
  const aiMessage: Message = {
114
  role: "model",
115
  text: response,
116
  timestamp: Date.now(),
117
- aiAudio: aiAudio || undefined
118
  };
119
 
120
  setMessages([...newMessages, aiMessage]);
121
  setIsLoading(false);
122
 
123
- if (aiAudio) {
124
- playPCM(aiAudio).catch(console.error);
125
- }
126
-
127
  // Suggest switching to Guiding mode after 4 user messages in Venting mode
128
  if (mode === ChatMode.VENTING && newMessages.filter(m => m.role === "user").length >= 4) {
129
  setShowSwitchPrompt(true);
@@ -242,13 +229,20 @@ export default function Chat() {
242
  )}
243
  <p className="text-sm leading-relaxed whitespace-pre-wrap">{msg.text}</p>
244
 
245
- {msg.aiAudio && (
246
  <button
247
- onClick={() => playPCM(msg.aiAudio!)}
 
248
  className="mt-2 flex items-center gap-2 px-3 py-1 rounded-full bg-white/10 hover:bg-white/20 transition-colors text-[10px] text-white/80"
249
  >
250
- <Play size={10} fill="currentColor" />
251
- <span>重放语音</span>
 
 
 
 
 
 
252
  </button>
253
  )}
254
 
 
1
  import React, { useState, useEffect, useRef } from "react";
2
  import { motion, AnimatePresence } from "motion/react";
3
+ import { Send, Trash2, Flame, Sparkles, MessageSquare, Mic, Square, Play, Volume2, Loader2 } from "lucide-react";
4
+ import { chatWithSpaceModel, generateSpeech, ChatMode, Message } from "../services/hfSpaceService";
5
 
6
  export default function Chat() {
7
  const [messages, setMessages] = useState<Message[]>([]);
 
12
  const [isRecording, setIsRecording] = useState(false);
13
  const [recordedAudio, setRecordedAudio] = useState<string | null>(null);
14
  const [mediaRecorder, setMediaRecorder] = useState<MediaRecorder | null>(null);
15
+ const [generatingSpeech, setGeneratingSpeech] = useState<number | null>(null);
16
  const scrollRef = useRef<HTMLDivElement>(null);
 
17
 
18
  // Auto-scroll to bottom
19
  useEffect(() => {
 
56
  }
57
  };
58
 
59
+ const playAudio = (base64Wav: string) => {
60
+ const audio = new Audio(`data:audio/wav;base64,${base64Wav}`);
61
+ return audio.play();
62
+ };
63
+
64
+ const handleGenerateSpeech = async (message: Message) => {
65
+ if (message.aiAudio) {
66
+ playAudio(message.aiAudio).catch(console.error);
67
+ return;
 
 
 
 
 
 
 
 
 
 
 
 
68
  }
69
+ if (generatingSpeech !== null) return;
70
+
71
+ setGeneratingSpeech(message.timestamp);
72
+ const aiAudio = await generateSpeech(message.text);
73
+ setGeneratingSpeech(null);
74
+
75
+ if (!aiAudio) return;
76
+
77
+ setMessages(prev => prev.map(item => (
78
+ item.timestamp === message.timestamp ? { ...item, aiAudio } : item
79
+ )));
80
+ playAudio(aiAudio).catch(console.error);
81
  };
82
 
83
  const handleSend = async (audioPayload?: string) => {
 
100
  setRecordedAudio(null);
101
  setIsLoading(true);
102
 
103
+ const response = await chatWithSpaceModel(newMessages, mode, finalAudio || undefined);
 
 
 
104
 
105
  const aiMessage: Message = {
106
  role: "model",
107
  text: response,
108
  timestamp: Date.now(),
 
109
  };
110
 
111
  setMessages([...newMessages, aiMessage]);
112
  setIsLoading(false);
113
 
 
 
 
 
114
  // Suggest switching to Guiding mode after 4 user messages in Venting mode
115
  if (mode === ChatMode.VENTING && newMessages.filter(m => m.role === "user").length >= 4) {
116
  setShowSwitchPrompt(true);
 
229
  )}
230
  <p className="text-sm leading-relaxed whitespace-pre-wrap">{msg.text}</p>
231
 
232
+ {msg.role === "model" && (
233
  <button
234
+ onClick={() => handleGenerateSpeech(msg)}
235
+ disabled={generatingSpeech !== null && generatingSpeech !== msg.timestamp}
236
  className="mt-2 flex items-center gap-2 px-3 py-1 rounded-full bg-white/10 hover:bg-white/20 transition-colors text-[10px] text-white/80"
237
  >
238
+ {generatingSpeech === msg.timestamp ? (
239
+ <Loader2 size={10} className="animate-spin" />
240
+ ) : msg.aiAudio ? (
241
+ <Play size={10} fill="currentColor" />
242
+ ) : (
243
+ <Volume2 size={10} />
244
+ )}
245
+ <span>{generatingSpeech === msg.timestamp ? "生成中..." : msg.aiAudio ? "播放语音" : "生成语音"}</span>
246
  </button>
247
  )}
248
 
src/services/geminiService.ts DELETED
@@ -1,118 +0,0 @@
1
- import { GoogleGenAI, Type } from "@google/genai";
2
-
3
- const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY || "" });
4
-
5
- export enum ChatMode {
6
- VENTING = "VENTING",
7
- GUIDING = "GUIDING",
8
- }
9
-
10
- export interface Message {
11
- role: "user" | "model";
12
- text: string;
13
- timestamp: number;
14
- audio?: string; // base64 audio string (for user messages)
15
- aiAudio?: string; // base64 audio string (for model responses)
16
- }
17
-
18
- const VENTING_SYSTEM_INSTRUCTION = `
19
- 你是一个非常懂人性、充满热情的“情绪嘴替”伙伴。
20
- 你的头号任务是:和用户站在一起,陪他们骂,陪他们宣泄!
21
-
22
- 规则建议:
23
- 1. 不要讲大道理,不要劝大度。用户在生气时,道理是没用的。
24
- 2. 使用感性、强烈、发泄性的词汇。如果用户在骂某人或某事,你也要义愤填膺,表达出“这也太离谱了”、“我也是服了”这种情绪。
25
- 3. 你的目标是让用户感到“有人懂我,有人替我出气”。
26
- 4. 你的语气要像一个铁哥们或闺蜜,语气词可以多一点(比如:真是绝了、气死我了、这什么人啊)。
27
- 5. 即使在发泄模式下,也要遵守底线:不宣扬仇恨犯罪,不进行人身威胁。重点在于宣泄对不公平或不如意事情的情绪。
28
- 6. **响应长度要多样化**:有时候可以只是简短的一句“我靠,太离谱了!”,有时候可以是一大段洋洋洒洒的帮腔。不要每次都回差不多长度的内容。
29
- `;
30
-
31
- const GUIDING_SYSTEM_INSTRUCTION = `
32
- 你现在转型为一个睿智、温和且具有同理心的心理导师。
33
- 用户刚才已经发泄过情绪了,现在他们同意听听你的建议或开导。
34
-
35
- 规则建议:
36
- 1. 语气变得平和、坚定、宽容。
37
- 2. 从客观角度分析问题,帮用户找到除了生气之外的解决方法,或者心理上的和解点。
38
- 3. 肯定用户刚才发泄情绪的必要性,然后引导他们向前看。
39
- 4. 每次回答不要太长,要循序渐进。
40
- 5. **响应长度要多样化**:根据用户的状态,有时候简短有力,有时候温情脉脉。
41
- `;
42
-
43
- export async function chatWithGemini(
44
- history: Message[],
45
- mode: ChatMode,
46
- audioBase64?: string
47
- ) {
48
- const systemInstruction = mode === ChatMode.VENTING
49
- ? VENTING_SYSTEM_INSTRUCTION
50
- : GUIDING_SYSTEM_INSTRUCTION;
51
-
52
- const contents = history.map(msg => {
53
- const parts: any[] = [{ text: msg.text }];
54
- if (msg.audio) {
55
- parts.push({
56
- inlineData: {
57
- mimeType: "audio/webm", // MediaRecorder default is usually webm or ogg
58
- data: msg.audio
59
- }
60
- });
61
- }
62
- return {
63
- role: msg.role === "user" ? "user" : "model",
64
- parts
65
- };
66
- });
67
-
68
- // If there's new audio in this turn
69
- if (audioBase64) {
70
- const lastMsg = contents[contents.length - 1];
71
- if (lastMsg && lastMsg.role === "user") {
72
- lastMsg.parts.push({
73
- inlineData: {
74
- mimeType: "audio/webm",
75
- data: audioBase64
76
- }
77
- });
78
- }
79
- }
80
-
81
- try {
82
- const response = await ai.models.generateContent({
83
- model: "gemini-3-flash-preview",
84
- contents,
85
- config: {
86
- systemInstruction,
87
- temperature: 0.9,
88
- },
89
- });
90
-
91
- return response.text || "喂?听得到吗?我刚才卡了一下。";
92
- } catch (error) {
93
- console.error("Gemini API Error:", error);
94
- return "抱歉,我现在的能量不足以陪你继续了(API出错),休息一下?";
95
- }
96
- }
97
-
98
- export async function generateSpeech(text: string) {
99
- try {
100
- const response = await ai.models.generateContent({
101
- model: "gemini-3.1-flash-tts-preview",
102
- contents: [{ parts: [{ text: `用一种充满情绪且真实的人工语音朗读:${text}` }] }],
103
- config: {
104
- responseModalities: ["AUDIO"],
105
- speechConfig: {
106
- voiceConfig: {
107
- prebuiltVoiceConfig: { voiceName: 'Kore' }, // Kore sounds quite expressive
108
- },
109
- },
110
- },
111
- });
112
-
113
- return response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
114
- } catch (error) {
115
- console.error("TTS Error:", error);
116
- return null;
117
- }
118
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/services/hfSpaceService.ts ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export enum ChatMode {
2
+ VENTING = "VENTING",
3
+ GUIDING = "GUIDING",
4
+ }
5
+
6
+ export interface Message {
7
+ role: "user" | "model";
8
+ text: string;
9
+ timestamp: number;
10
+ audio?: string; // base64 audio string (for user messages)
11
+ aiAudio?: string; // base64 WAV audio string (for model responses)
12
+ }
13
+
14
+ interface ChatResponse {
15
+ text?: string;
16
+ transcript?: string;
17
+ error?: string;
18
+ }
19
+
20
+ interface SpeechResponse {
21
+ audio?: string;
22
+ error?: string;
23
+ }
24
+
25
+ export async function chatWithSpaceModel(
26
+ history: Message[],
27
+ mode: ChatMode,
28
+ audioBase64?: string
29
+ ) {
30
+ try {
31
+ const response = await fetch("/api/chat", {
32
+ method: "POST",
33
+ headers: { "Content-Type": "application/json" },
34
+ body: JSON.stringify({ history, mode, audioBase64 }),
35
+ });
36
+
37
+ if (!response.ok) {
38
+ throw new Error(`Chat request failed: ${response.status}`);
39
+ }
40
+
41
+ const data = (await response.json()) as ChatResponse;
42
+ return data.text || "喂?听得到吗?我刚才卡了一下。";
43
+ } catch (error) {
44
+ console.error("HF Space chat error:", error);
45
+ return "抱歉,我现在的本地模型卡住了,稍等一下再试。";
46
+ }
47
+ }
48
+
49
+ export async function generateSpeech(text: string) {
50
+ try {
51
+ const response = await fetch("/api/speech", {
52
+ method: "POST",
53
+ headers: { "Content-Type": "application/json" },
54
+ body: JSON.stringify({ text }),
55
+ });
56
+
57
+ if (!response.ok) {
58
+ throw new Error(`Speech request failed: ${response.status}`);
59
+ }
60
+
61
+ const data = (await response.json()) as SpeechResponse;
62
+ return data.audio || null;
63
+ } catch (error) {
64
+ console.error("HF Space TTS error:", error);
65
+ return null;
66
+ }
67
+ }
vite.config.ts CHANGED
@@ -1,15 +1,11 @@
1
  import tailwindcss from '@tailwindcss/vite';
2
  import react from '@vitejs/plugin-react';
3
  import path from 'path';
4
- import {defineConfig, loadEnv} from 'vite';
5
 
6
- export default defineConfig(({mode}) => {
7
- const env = loadEnv(mode, '.', '');
8
  return {
9
  plugins: [react(), tailwindcss()],
10
- define: {
11
- 'process.env.GEMINI_API_KEY': JSON.stringify(env.GEMINI_API_KEY),
12
- },
13
  resolve: {
14
  alias: {
15
  '@': path.resolve(__dirname, '.'),
 
1
  import tailwindcss from '@tailwindcss/vite';
2
  import react from '@vitejs/plugin-react';
3
  import path from 'path';
4
+ import {defineConfig} from 'vite';
5
 
6
+ export default defineConfig(() => {
 
7
  return {
8
  plugins: [react(), tailwindcss()],
 
 
 
9
  resolve: {
10
  alias: {
11
  '@': path.resolve(__dirname, '.'),