Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,9 @@ from google.genai import types
|
|
| 9 |
|
| 10 |
app = Flask(__name__)
|
| 11 |
|
|
|
|
| 12 |
# Config
|
|
|
|
| 13 |
MODEL = os.environ.get("GEMINI_MODEL", "gemini-3-flash-preview")
|
| 14 |
THINKING_LEVEL = os.environ.get("GEMINI_THINKING_LEVEL", "HIGH")
|
| 15 |
|
|
@@ -31,44 +33,10 @@ def _client_ip() -> str:
|
|
| 31 |
return request.headers.get("x-forwarded-for", request.remote_addr or "unknown")
|
| 32 |
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
return
|
| 38 |
-
"ok": True,
|
| 39 |
-
"model": MODEL,
|
| 40 |
-
"thinking_level": THINKING_LEVEL,
|
| 41 |
-
"memory_messages": len(HISTORY),
|
| 42 |
-
"max_messages": MAX_MESSAGES,
|
| 43 |
-
})
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
@app.post("/v1/chat")
|
| 47 |
-
def chat():
|
| 48 |
-
t0 = time.time()
|
| 49 |
-
ip = _client_ip()
|
| 50 |
-
|
| 51 |
-
data = request.get_json(silent=True) or {}
|
| 52 |
-
user_text = (data.get("text") or "").strip()
|
| 53 |
-
|
| 54 |
-
print(f"[/v1/chat] START {time.strftime('%Y-%m-%d %H:%M:%S')} ip={ip} mem_before={len(HISTORY)}/{MAX_MESSAGES}")
|
| 55 |
-
|
| 56 |
-
if not user_text:
|
| 57 |
-
print(f"[/v1/chat] ERROR missing text ip={ip}")
|
| 58 |
-
return jsonify({"error": "Missing 'text'"}), 400
|
| 59 |
-
|
| 60 |
-
print(f"[/v1/chat] user_text_len={len(user_text)} user_text={user_text!r}")
|
| 61 |
-
|
| 62 |
-
# Add user message to memory
|
| 63 |
-
HISTORY.append(
|
| 64 |
-
types.Content(
|
| 65 |
-
role="user",
|
| 66 |
-
parts=[types.Part.from_text(text=user_text)],
|
| 67 |
-
)
|
| 68 |
-
)
|
| 69 |
-
print(f"[/v1/chat] appended user msg -> mem_now={len(HISTORY)}/{MAX_MESSAGES}")
|
| 70 |
-
|
| 71 |
-
config = types.GenerateContentConfig(
|
| 72 |
system_instruction=[types.Part.from_text(text=SYSTEM_PROMPT)],
|
| 73 |
thinking_config=types.ThinkingConfig(thinking_level=THINKING_LEVEL),
|
| 74 |
safety_settings=[
|
|
@@ -91,25 +59,85 @@ def chat():
|
|
| 91 |
],
|
| 92 |
)
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
try:
|
| 95 |
resp = client.models.generate_content(
|
| 96 |
model=MODEL,
|
| 97 |
-
contents=list(HISTORY),
|
| 98 |
-
config=
|
| 99 |
)
|
| 100 |
reply_text = (resp.text or "").strip()
|
| 101 |
-
print(f"[/v1/chat] gemini_ok reply_len={len(reply_text)}")
|
| 102 |
-
print(f"[/v1/chat] bot_reply={reply_text!r}")
|
| 103 |
-
|
| 104 |
|
| 105 |
# Add assistant message to memory
|
| 106 |
HISTORY.append(
|
| 107 |
types.Content(
|
| 108 |
-
role="model",
|
| 109 |
parts=[types.Part.from_text(text=reply_text)],
|
| 110 |
)
|
| 111 |
)
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
dt_ms = int((time.time() - t0) * 1000)
|
| 115 |
print(f"[/v1/chat] END ip={ip} total_ms={dt_ms}")
|
|
@@ -123,17 +151,21 @@ def chat():
|
|
| 123 |
})
|
| 124 |
|
| 125 |
except Exception as e:
|
|
|
|
| 126 |
print("Gemini error:", repr(e))
|
|
|
|
|
|
|
| 127 |
|
| 128 |
-
# Roll back last user message on failure
|
| 129 |
-
if len(HISTORY) > 0 and getattr(HISTORY[-1], "role", None) == "user":
|
| 130 |
-
HISTORY.pop()
|
| 131 |
-
print(f"[/v1/chat] rollback user msg -> mem_now={len(HISTORY)}/{MAX_MESSAGES}")
|
| 132 |
-
|
| 133 |
-
dt_ms = int((time.time() - t0) * 1000)
|
| 134 |
-
print(f"[/v1/chat] FAIL ip={ip} total_ms={dt_ms}")
|
| 135 |
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
@app.post("/v1/reset")
|
|
@@ -144,6 +176,9 @@ def reset():
|
|
| 144 |
return jsonify({"ok": True, "memory_messages": 0})
|
| 145 |
|
| 146 |
|
|
|
|
|
|
|
|
|
|
| 147 |
if __name__ == "__main__":
|
| 148 |
port = int(os.environ.get("PORT", "7860"))
|
| 149 |
print(f"[startup] model={MODEL} thinking_level={THINKING_LEVEL} max_messages={MAX_MESSAGES} port={port}")
|
|
|
|
| 9 |
|
| 10 |
app = Flask(__name__)
|
| 11 |
|
| 12 |
+
# -------------------------
|
| 13 |
# Config
|
| 14 |
+
# -------------------------
|
| 15 |
MODEL = os.environ.get("GEMINI_MODEL", "gemini-3-flash-preview")
|
| 16 |
THINKING_LEVEL = os.environ.get("GEMINI_THINKING_LEVEL", "HIGH")
|
| 17 |
|
|
|
|
| 33 |
return request.headers.get("x-forwarded-for", request.remote_addr or "unknown")
|
| 34 |
|
| 35 |
|
| 36 |
+
def _gemini_config() -> types.GenerateContentConfig:
|
| 37 |
+
# NOTE: Setting thresholds to OFF is permissive and may not be honored for all content;
|
| 38 |
+
# some protections are not adjustable.
|
| 39 |
+
return types.GenerateContentConfig(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
system_instruction=[types.Part.from_text(text=SYSTEM_PROMPT)],
|
| 41 |
thinking_config=types.ThinkingConfig(thinking_level=THINKING_LEVEL),
|
| 42 |
safety_settings=[
|
|
|
|
| 59 |
],
|
| 60 |
)
|
| 61 |
|
| 62 |
+
|
| 63 |
+
def llm_chat(user_text: str) -> str:
|
| 64 |
+
"""
|
| 65 |
+
Updates global HISTORY (user + model), calls Gemini, returns model reply text.
|
| 66 |
+
Rolls back the last user message if Gemini call fails.
|
| 67 |
+
"""
|
| 68 |
+
user_text = (user_text or "").strip()
|
| 69 |
+
if not user_text:
|
| 70 |
+
raise ValueError("Missing 'text'")
|
| 71 |
+
|
| 72 |
+
# Add user message to memory
|
| 73 |
+
HISTORY.append(
|
| 74 |
+
types.Content(
|
| 75 |
+
role="user",
|
| 76 |
+
parts=[types.Part.from_text(text=user_text)],
|
| 77 |
+
)
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
try:
|
| 81 |
resp = client.models.generate_content(
|
| 82 |
model=MODEL,
|
| 83 |
+
contents=list(HISTORY),
|
| 84 |
+
config=_gemini_config(),
|
| 85 |
)
|
| 86 |
reply_text = (resp.text or "").strip()
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
# Add assistant message to memory
|
| 89 |
HISTORY.append(
|
| 90 |
types.Content(
|
| 91 |
+
role="model",
|
| 92 |
parts=[types.Part.from_text(text=reply_text)],
|
| 93 |
)
|
| 94 |
)
|
| 95 |
+
|
| 96 |
+
return reply_text
|
| 97 |
+
|
| 98 |
+
except Exception:
|
| 99 |
+
# Roll back last user message on failure
|
| 100 |
+
if len(HISTORY) > 0 and getattr(HISTORY[-1], "role", None) == "user":
|
| 101 |
+
HISTORY.pop()
|
| 102 |
+
raise
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
# -------------------------
|
| 106 |
+
# Endpoints
|
| 107 |
+
# -------------------------
|
| 108 |
+
@app.get("/health")
|
| 109 |
+
def health():
|
| 110 |
+
print(f"[/health] {time.strftime('%Y-%m-%d %H:%M:%S')} ip={_client_ip()} mem={len(HISTORY)}/{MAX_MESSAGES}")
|
| 111 |
+
return jsonify({
|
| 112 |
+
"ok": True,
|
| 113 |
+
"model": MODEL,
|
| 114 |
+
"thinking_level": THINKING_LEVEL,
|
| 115 |
+
"memory_messages": len(HISTORY),
|
| 116 |
+
"max_messages": MAX_MESSAGES,
|
| 117 |
+
})
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
@app.post("/v1/chat")
|
| 121 |
+
def chat_text():
|
| 122 |
+
t0 = time.time()
|
| 123 |
+
ip = _client_ip()
|
| 124 |
+
|
| 125 |
+
data = request.get_json(silent=True) or {}
|
| 126 |
+
user_text = (data.get("text") or "").strip()
|
| 127 |
+
|
| 128 |
+
print(f"[/v1/chat] START {time.strftime('%Y-%m-%d %H:%M:%S')} ip={ip} mem_before={len(HISTORY)}/{MAX_MESSAGES}")
|
| 129 |
+
|
| 130 |
+
if not user_text:
|
| 131 |
+
print(f"[/v1/chat] ERROR missing text ip={ip}")
|
| 132 |
+
return jsonify({"error": "Missing 'text'"}), 400
|
| 133 |
+
|
| 134 |
+
print(f"[/v1/chat] user_text_len={len(user_text)} user_text={user_text!r}")
|
| 135 |
+
|
| 136 |
+
try:
|
| 137 |
+
reply_text = llm_chat(user_text)
|
| 138 |
+
print(f"[/v1/chat] gemini_ok reply_len={len(reply_text)}")
|
| 139 |
+
print(f"[/v1/chat] bot_reply={reply_text!r}")
|
| 140 |
+
print(f"[/v1/chat] mem_now={len(HISTORY)}/{MAX_MESSAGES}")
|
| 141 |
|
| 142 |
dt_ms = int((time.time() - t0) * 1000)
|
| 143 |
print(f"[/v1/chat] END ip={ip} total_ms={dt_ms}")
|
|
|
|
| 151 |
})
|
| 152 |
|
| 153 |
except Exception as e:
|
| 154 |
+
dt_ms = int((time.time() - t0) * 1000)
|
| 155 |
print("Gemini error:", repr(e))
|
| 156 |
+
print(f"[/v1/chat] FAIL ip={ip} total_ms={dt_ms} mem_now={len(HISTORY)}/{MAX_MESSAGES}")
|
| 157 |
+
return jsonify({"error": "Gemini call failed"}), 500
|
| 158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
+
@app.post("/v1/utterance")
|
| 161 |
+
def chat_audio():
|
| 162 |
+
"""
|
| 163 |
+
Audio endpoint (placeholder for now).
|
| 164 |
+
Later: accept audio (multipart/form-data), run STT -> llm_chat -> TTS -> return audio.
|
| 165 |
+
"""
|
| 166 |
+
ip = _client_ip()
|
| 167 |
+
print(f"[/v1/utterance] HIT {time.strftime('%Y-%m-%d %H:%M:%S')} ip={ip} (not implemented)")
|
| 168 |
+
return jsonify({"error": "Not implemented yet"}), 501
|
| 169 |
|
| 170 |
|
| 171 |
@app.post("/v1/reset")
|
|
|
|
| 176 |
return jsonify({"ok": True, "memory_messages": 0})
|
| 177 |
|
| 178 |
|
| 179 |
+
# -------------------------
|
| 180 |
+
# Startup
|
| 181 |
+
# -------------------------
|
| 182 |
if __name__ == "__main__":
|
| 183 |
port = int(os.environ.get("PORT", "7860"))
|
| 184 |
print(f"[startup] model={MODEL} thinking_level={THINKING_LEVEL} max_messages={MAX_MESSAGES} port={port}")
|