tonyassi commited on
Commit
fbe398e
·
verified ·
1 Parent(s): 693d7dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -53
app.py CHANGED
@@ -9,7 +9,9 @@ from google.genai import types
9
 
10
  app = Flask(__name__)
11
 
 
12
  # Config
 
13
  MODEL = os.environ.get("GEMINI_MODEL", "gemini-3-flash-preview")
14
  THINKING_LEVEL = os.environ.get("GEMINI_THINKING_LEVEL", "HIGH")
15
 
@@ -31,44 +33,10 @@ def _client_ip() -> str:
31
  return request.headers.get("x-forwarded-for", request.remote_addr or "unknown")
32
 
33
 
34
- @app.get("/health")
35
- def health():
36
- print(f"[/health] {time.strftime('%Y-%m-%d %H:%M:%S')} ip={_client_ip()} mem={len(HISTORY)}/{MAX_MESSAGES}")
37
- return jsonify({
38
- "ok": True,
39
- "model": MODEL,
40
- "thinking_level": THINKING_LEVEL,
41
- "memory_messages": len(HISTORY),
42
- "max_messages": MAX_MESSAGES,
43
- })
44
-
45
-
46
- @app.post("/v1/chat")
47
- def chat():
48
- t0 = time.time()
49
- ip = _client_ip()
50
-
51
- data = request.get_json(silent=True) or {}
52
- user_text = (data.get("text") or "").strip()
53
-
54
- print(f"[/v1/chat] START {time.strftime('%Y-%m-%d %H:%M:%S')} ip={ip} mem_before={len(HISTORY)}/{MAX_MESSAGES}")
55
-
56
- if not user_text:
57
- print(f"[/v1/chat] ERROR missing text ip={ip}")
58
- return jsonify({"error": "Missing 'text'"}), 400
59
-
60
- print(f"[/v1/chat] user_text_len={len(user_text)} user_text={user_text!r}")
61
-
62
- # Add user message to memory
63
- HISTORY.append(
64
- types.Content(
65
- role="user",
66
- parts=[types.Part.from_text(text=user_text)],
67
- )
68
- )
69
- print(f"[/v1/chat] appended user msg -> mem_now={len(HISTORY)}/{MAX_MESSAGES}")
70
-
71
- config = types.GenerateContentConfig(
72
  system_instruction=[types.Part.from_text(text=SYSTEM_PROMPT)],
73
  thinking_config=types.ThinkingConfig(thinking_level=THINKING_LEVEL),
74
  safety_settings=[
@@ -91,25 +59,85 @@ def chat():
91
  ],
92
  )
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  try:
95
  resp = client.models.generate_content(
96
  model=MODEL,
97
- contents=list(HISTORY), # send full rolling memory
98
- config=config,
99
  )
100
  reply_text = (resp.text or "").strip()
101
- print(f"[/v1/chat] gemini_ok reply_len={len(reply_text)}")
102
- print(f"[/v1/chat] bot_reply={reply_text!r}")
103
-
104
 
105
  # Add assistant message to memory
106
  HISTORY.append(
107
  types.Content(
108
- role="model", # assistant role for Gemini contents
109
  parts=[types.Part.from_text(text=reply_text)],
110
  )
111
  )
112
- print(f"[/v1/chat] appended model msg -> mem_now={len(HISTORY)}/{MAX_MESSAGES}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  dt_ms = int((time.time() - t0) * 1000)
115
  print(f"[/v1/chat] END ip={ip} total_ms={dt_ms}")
@@ -123,17 +151,21 @@ def chat():
123
  })
124
 
125
  except Exception as e:
 
126
  print("Gemini error:", repr(e))
 
 
127
 
128
- # Roll back last user message on failure
129
- if len(HISTORY) > 0 and getattr(HISTORY[-1], "role", None) == "user":
130
- HISTORY.pop()
131
- print(f"[/v1/chat] rollback user msg -> mem_now={len(HISTORY)}/{MAX_MESSAGES}")
132
-
133
- dt_ms = int((time.time() - t0) * 1000)
134
- print(f"[/v1/chat] FAIL ip={ip} total_ms={dt_ms}")
135
 
136
- return jsonify({"error": "Gemini call failed"}), 500
 
 
 
 
 
 
 
 
137
 
138
 
139
  @app.post("/v1/reset")
@@ -144,6 +176,9 @@ def reset():
144
  return jsonify({"ok": True, "memory_messages": 0})
145
 
146
 
 
 
 
147
  if __name__ == "__main__":
148
  port = int(os.environ.get("PORT", "7860"))
149
  print(f"[startup] model={MODEL} thinking_level={THINKING_LEVEL} max_messages={MAX_MESSAGES} port={port}")
 
9
 
10
  app = Flask(__name__)
11
 
12
+ # -------------------------
13
  # Config
14
+ # -------------------------
15
  MODEL = os.environ.get("GEMINI_MODEL", "gemini-3-flash-preview")
16
  THINKING_LEVEL = os.environ.get("GEMINI_THINKING_LEVEL", "HIGH")
17
 
 
33
  return request.headers.get("x-forwarded-for", request.remote_addr or "unknown")
34
 
35
 
36
+ def _gemini_config() -> types.GenerateContentConfig:
37
+ # NOTE: Setting thresholds to OFF is permissive and may not be honored for all content;
38
+ # some protections are not adjustable.
39
+ return types.GenerateContentConfig(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  system_instruction=[types.Part.from_text(text=SYSTEM_PROMPT)],
41
  thinking_config=types.ThinkingConfig(thinking_level=THINKING_LEVEL),
42
  safety_settings=[
 
59
  ],
60
  )
61
 
62
+
63
+ def llm_chat(user_text: str) -> str:
64
+ """
65
+ Updates global HISTORY (user + model), calls Gemini, returns model reply text.
66
+ Rolls back the last user message if Gemini call fails.
67
+ """
68
+ user_text = (user_text or "").strip()
69
+ if not user_text:
70
+ raise ValueError("Missing 'text'")
71
+
72
+ # Add user message to memory
73
+ HISTORY.append(
74
+ types.Content(
75
+ role="user",
76
+ parts=[types.Part.from_text(text=user_text)],
77
+ )
78
+ )
79
+
80
  try:
81
  resp = client.models.generate_content(
82
  model=MODEL,
83
+ contents=list(HISTORY),
84
+ config=_gemini_config(),
85
  )
86
  reply_text = (resp.text or "").strip()
 
 
 
87
 
88
  # Add assistant message to memory
89
  HISTORY.append(
90
  types.Content(
91
+ role="model",
92
  parts=[types.Part.from_text(text=reply_text)],
93
  )
94
  )
95
+
96
+ return reply_text
97
+
98
+ except Exception:
99
+ # Roll back last user message on failure
100
+ if len(HISTORY) > 0 and getattr(HISTORY[-1], "role", None) == "user":
101
+ HISTORY.pop()
102
+ raise
103
+
104
+
105
+ # -------------------------
106
+ # Endpoints
107
+ # -------------------------
108
+ @app.get("/health")
109
+ def health():
110
+ print(f"[/health] {time.strftime('%Y-%m-%d %H:%M:%S')} ip={_client_ip()} mem={len(HISTORY)}/{MAX_MESSAGES}")
111
+ return jsonify({
112
+ "ok": True,
113
+ "model": MODEL,
114
+ "thinking_level": THINKING_LEVEL,
115
+ "memory_messages": len(HISTORY),
116
+ "max_messages": MAX_MESSAGES,
117
+ })
118
+
119
+
120
+ @app.post("/v1/chat")
121
+ def chat_text():
122
+ t0 = time.time()
123
+ ip = _client_ip()
124
+
125
+ data = request.get_json(silent=True) or {}
126
+ user_text = (data.get("text") or "").strip()
127
+
128
+ print(f"[/v1/chat] START {time.strftime('%Y-%m-%d %H:%M:%S')} ip={ip} mem_before={len(HISTORY)}/{MAX_MESSAGES}")
129
+
130
+ if not user_text:
131
+ print(f"[/v1/chat] ERROR missing text ip={ip}")
132
+ return jsonify({"error": "Missing 'text'"}), 400
133
+
134
+ print(f"[/v1/chat] user_text_len={len(user_text)} user_text={user_text!r}")
135
+
136
+ try:
137
+ reply_text = llm_chat(user_text)
138
+ print(f"[/v1/chat] gemini_ok reply_len={len(reply_text)}")
139
+ print(f"[/v1/chat] bot_reply={reply_text!r}")
140
+ print(f"[/v1/chat] mem_now={len(HISTORY)}/{MAX_MESSAGES}")
141
 
142
  dt_ms = int((time.time() - t0) * 1000)
143
  print(f"[/v1/chat] END ip={ip} total_ms={dt_ms}")
 
151
  })
152
 
153
  except Exception as e:
154
+ dt_ms = int((time.time() - t0) * 1000)
155
  print("Gemini error:", repr(e))
156
+ print(f"[/v1/chat] FAIL ip={ip} total_ms={dt_ms} mem_now={len(HISTORY)}/{MAX_MESSAGES}")
157
+ return jsonify({"error": "Gemini call failed"}), 500
158
 
 
 
 
 
 
 
 
159
 
160
+ @app.post("/v1/utterance")
161
+ def chat_audio():
162
+ """
163
+ Audio endpoint (placeholder for now).
164
+ Later: accept audio (multipart/form-data), run STT -> llm_chat -> TTS -> return audio.
165
+ """
166
+ ip = _client_ip()
167
+ print(f"[/v1/utterance] HIT {time.strftime('%Y-%m-%d %H:%M:%S')} ip={ip} (not implemented)")
168
+ return jsonify({"error": "Not implemented yet"}), 501
169
 
170
 
171
  @app.post("/v1/reset")
 
176
  return jsonify({"ok": True, "memory_messages": 0})
177
 
178
 
179
+ # -------------------------
180
+ # Startup
181
+ # -------------------------
182
  if __name__ == "__main__":
183
  port = int(os.environ.get("PORT", "7860"))
184
  print(f"[startup] model={MODEL} thinking_level={THINKING_LEVEL} max_messages={MAX_MESSAGES} port={port}")