Spaces:

tonyassi
/

andy

Sleeping

App Files Files Community

tonyassi commited on 30 days ago

Commit

fbe398e

verified ·

1 Parent(s): 693d7dc

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -53

app.py CHANGED Viewed

@@ -9,7 +9,9 @@ from google.genai import types
 app = Flask(__name__)
 # Config
 MODEL = os.environ.get("GEMINI_MODEL", "gemini-3-flash-preview")
 THINKING_LEVEL = os.environ.get("GEMINI_THINKING_LEVEL", "HIGH")
@@ -31,44 +33,10 @@ def _client_ip() -> str:
     return request.headers.get("x-forwarded-for", request.remote_addr or "unknown")
-@app.get("/health")
-def health():
-    print(f"[/health] {time.strftime('%Y-%m-%d %H:%M:%S')} ip={_client_ip()} mem={len(HISTORY)}/{MAX_MESSAGES}")
-    return jsonify({
-        "ok": True,
-        "model": MODEL,
-        "thinking_level": THINKING_LEVEL,
-        "memory_messages": len(HISTORY),
-        "max_messages": MAX_MESSAGES,
-    })
-@app.post("/v1/chat")
-def chat():
-    t0 = time.time()
-    ip = _client_ip()
-    data = request.get_json(silent=True) or {}
-    user_text = (data.get("text") or "").strip()
-    print(f"[/v1/chat] START {time.strftime('%Y-%m-%d %H:%M:%S')} ip={ip} mem_before={len(HISTORY)}/{MAX_MESSAGES}")
-    if not user_text:
-        print(f"[/v1/chat] ERROR missing text ip={ip}")
-        return jsonify({"error": "Missing 'text'"}), 400
-    print(f"[/v1/chat] user_text_len={len(user_text)} user_text={user_text!r}")
-    # Add user message to memory
-    HISTORY.append(
-        types.Content(
-            role="user",
-            parts=[types.Part.from_text(text=user_text)],
-        )
-    )
-    print(f"[/v1/chat] appended user msg -> mem_now={len(HISTORY)}/{MAX_MESSAGES}")
-    config = types.GenerateContentConfig(
         system_instruction=[types.Part.from_text(text=SYSTEM_PROMPT)],
         thinking_config=types.ThinkingConfig(thinking_level=THINKING_LEVEL),
         safety_settings=[
@@ -91,25 +59,85 @@ def chat():
         ],
     )
     try:
         resp = client.models.generate_content(
             model=MODEL,
-            contents=list(HISTORY),  # send full rolling memory
-            config=config,
         )
         reply_text = (resp.text or "").strip()
-        print(f"[/v1/chat] gemini_ok reply_len={len(reply_text)}")
-        print(f"[/v1/chat] bot_reply={reply_text!r}")
         # Add assistant message to memory
         HISTORY.append(
             types.Content(
-                role="model",  # assistant role for Gemini contents
                 parts=[types.Part.from_text(text=reply_text)],
             )
         )
-        print(f"[/v1/chat] appended model msg -> mem_now={len(HISTORY)}/{MAX_MESSAGES}")
         dt_ms = int((time.time() - t0) * 1000)
         print(f"[/v1/chat] END ip={ip} total_ms={dt_ms}")
@@ -123,17 +151,21 @@ def chat():
         })
     except Exception as e:
         print("Gemini error:", repr(e))
-        # Roll back last user message on failure
-        if len(HISTORY) > 0 and getattr(HISTORY[-1], "role", None) == "user":
-            HISTORY.pop()
-            print(f"[/v1/chat] rollback user msg -> mem_now={len(HISTORY)}/{MAX_MESSAGES}")
-        dt_ms = int((time.time() - t0) * 1000)
-        print(f"[/v1/chat] FAIL ip={ip} total_ms={dt_ms}")
-        return jsonify({"error": "Gemini call failed"}), 500
 @app.post("/v1/reset")
@@ -144,6 +176,9 @@ def reset():
     return jsonify({"ok": True, "memory_messages": 0})
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", "7860"))
     print(f"[startup] model={MODEL} thinking_level={THINKING_LEVEL} max_messages={MAX_MESSAGES} port={port}")

 app = Flask(__name__)
+# -------------------------
 # Config
+# -------------------------
 MODEL = os.environ.get("GEMINI_MODEL", "gemini-3-flash-preview")
 THINKING_LEVEL = os.environ.get("GEMINI_THINKING_LEVEL", "HIGH")
     return request.headers.get("x-forwarded-for", request.remote_addr or "unknown")
+def _gemini_config() -> types.GenerateContentConfig:
+    # NOTE: Setting thresholds to OFF is permissive and may not be honored for all content;
+    # some protections are not adjustable.
+    return types.GenerateContentConfig(
         system_instruction=[types.Part.from_text(text=SYSTEM_PROMPT)],
         thinking_config=types.ThinkingConfig(thinking_level=THINKING_LEVEL),
         safety_settings=[
         ],
     )
+def llm_chat(user_text: str) -> str:
+    """
+    Updates global HISTORY (user + model), calls Gemini, returns model reply text.
+    Rolls back the last user message if Gemini call fails.
+    """
+    user_text = (user_text or "").strip()
+    if not user_text:
+        raise ValueError("Missing 'text'")
+    # Add user message to memory
+    HISTORY.append(
+        types.Content(
+            role="user",
+            parts=[types.Part.from_text(text=user_text)],
+        )
+    )
     try:
         resp = client.models.generate_content(
             model=MODEL,
+            contents=list(HISTORY),
+            config=_gemini_config(),
         )
         reply_text = (resp.text or "").strip()
         # Add assistant message to memory
         HISTORY.append(
             types.Content(
+                role="model",
                 parts=[types.Part.from_text(text=reply_text)],
             )
         )
+        return reply_text
+    except Exception:
+        # Roll back last user message on failure
+        if len(HISTORY) > 0 and getattr(HISTORY[-1], "role", None) == "user":
+            HISTORY.pop()
+        raise
+# -------------------------
+# Endpoints
+# -------------------------
+@app.get("/health")
+def health():
+    print(f"[/health] {time.strftime('%Y-%m-%d %H:%M:%S')} ip={_client_ip()} mem={len(HISTORY)}/{MAX_MESSAGES}")
+    return jsonify({
+        "ok": True,
+        "model": MODEL,
+        "thinking_level": THINKING_LEVEL,
+        "memory_messages": len(HISTORY),
+        "max_messages": MAX_MESSAGES,
+    })
+@app.post("/v1/chat")
+def chat_text():
+    t0 = time.time()
+    ip = _client_ip()
+    data = request.get_json(silent=True) or {}
+    user_text = (data.get("text") or "").strip()
+    print(f"[/v1/chat] START {time.strftime('%Y-%m-%d %H:%M:%S')} ip={ip} mem_before={len(HISTORY)}/{MAX_MESSAGES}")
+    if not user_text:
+        print(f"[/v1/chat] ERROR missing text ip={ip}")
+        return jsonify({"error": "Missing 'text'"}), 400
+    print(f"[/v1/chat] user_text_len={len(user_text)} user_text={user_text!r}")
+    try:
+        reply_text = llm_chat(user_text)
+        print(f"[/v1/chat] gemini_ok reply_len={len(reply_text)}")
+        print(f"[/v1/chat] bot_reply={reply_text!r}")
+        print(f"[/v1/chat] mem_now={len(HISTORY)}/{MAX_MESSAGES}")
         dt_ms = int((time.time() - t0) * 1000)
         print(f"[/v1/chat] END ip={ip} total_ms={dt_ms}")
         })
     except Exception as e:
+        dt_ms = int((time.time() - t0) * 1000)
         print("Gemini error:", repr(e))
+        print(f"[/v1/chat] FAIL ip={ip} total_ms={dt_ms} mem_now={len(HISTORY)}/{MAX_MESSAGES}")
+        return jsonify({"error": "Gemini call failed"}), 500
+@app.post("/v1/utterance")
+def chat_audio():
+    """
+    Audio endpoint (placeholder for now).
+    Later: accept audio (multipart/form-data), run STT -> llm_chat -> TTS -> return audio.
+    """
+    ip = _client_ip()
+    print(f"[/v1/utterance] HIT {time.strftime('%Y-%m-%d %H:%M:%S')} ip={ip} (not implemented)")
+    return jsonify({"error": "Not implemented yet"}), 501
 @app.post("/v1/reset")
     return jsonify({"ok": True, "memory_messages": 0})
+# -------------------------
+# Startup
+# -------------------------
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", "7860"))
     print(f"[startup] model={MODEL} thinking_level={THINKING_LEVEL} max_messages={MAX_MESSAGES} port={port}")