tonyassi commited on
Commit
0f1a405
·
verified ·
1 Parent(s): 519b971

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -0
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  from collections import deque
3
  from flask import Flask, request, jsonify
4
  from waitress import serve
@@ -24,8 +25,15 @@ client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
24
  MAX_MESSAGES = 20 # user+assistant messages combined
25
  HISTORY = deque(maxlen=MAX_MESSAGES) # holds types.Content objects
26
 
 
 
 
 
 
 
27
  @app.get("/health")
28
  def health():
 
29
  return jsonify({
30
  "ok": True,
31
  "model": MODEL,
@@ -34,13 +42,23 @@ def health():
34
  "max_messages": MAX_MESSAGES,
35
  })
36
 
 
37
  @app.post("/v1/chat")
38
  def chat():
 
 
 
39
  data = request.get_json(silent=True) or {}
40
  user_text = (data.get("text") or "").strip()
 
 
 
41
  if not user_text:
 
42
  return jsonify({"error": "Missing 'text'"}), 400
43
 
 
 
44
  # Add user message to memory
45
  HISTORY.append(
46
  types.Content(
@@ -48,6 +66,7 @@ def chat():
48
  parts=[types.Part.from_text(text=user_text)],
49
  )
50
  )
 
51
 
52
  config = types.GenerateContentConfig(
53
  system_instruction=[types.Part.from_text(text=SYSTEM_PROMPT)],
@@ -61,6 +80,7 @@ def chat():
61
  config=config,
62
  )
63
  reply_text = (resp.text or "").strip()
 
64
 
65
  # Add assistant message to memory
66
  HISTORY.append(
@@ -69,12 +89,17 @@ def chat():
69
  parts=[types.Part.from_text(text=reply_text)],
70
  )
71
  )
 
 
 
 
72
 
73
  return jsonify({
74
  "input": user_text,
75
  "reply_text": reply_text,
76
  "model": MODEL,
77
  "memory_messages": len(HISTORY),
 
78
  })
79
 
80
  except Exception as e:
@@ -83,14 +108,23 @@ def chat():
83
  # Roll back last user message on failure
84
  if len(HISTORY) > 0 and getattr(HISTORY[-1], "role", None) == "user":
85
  HISTORY.pop()
 
 
 
 
86
 
87
  return jsonify({"error": "Gemini call failed"}), 500
88
 
 
89
  @app.post("/v1/reset")
90
  def reset():
 
 
91
  HISTORY.clear()
92
  return jsonify({"ok": True, "memory_messages": 0})
93
 
 
94
  if __name__ == "__main__":
95
  port = int(os.environ.get("PORT", "7860"))
 
96
  serve(app, host="0.0.0.0", port=port)
 
1
  import os
2
+ import time
3
  from collections import deque
4
  from flask import Flask, request, jsonify
5
  from waitress import serve
 
25
  MAX_MESSAGES = 20 # user+assistant messages combined
26
  HISTORY = deque(maxlen=MAX_MESSAGES) # holds types.Content objects
27
 
28
+
29
+ def _client_ip() -> str:
30
+ # HF may proxy requests; this is best-effort
31
+ return request.headers.get("x-forwarded-for", request.remote_addr or "unknown")
32
+
33
+
34
  @app.get("/health")
35
  def health():
36
+ print(f"[/health] {time.strftime('%Y-%m-%d %H:%M:%S')} ip={_client_ip()} mem={len(HISTORY)}/{MAX_MESSAGES}")
37
  return jsonify({
38
  "ok": True,
39
  "model": MODEL,
 
42
  "max_messages": MAX_MESSAGES,
43
  })
44
 
45
+
46
  @app.post("/v1/chat")
47
  def chat():
48
+ t0 = time.time()
49
+ ip = _client_ip()
50
+
51
  data = request.get_json(silent=True) or {}
52
  user_text = (data.get("text") or "").strip()
53
+
54
+ print(f"[/v1/chat] START {time.strftime('%Y-%m-%d %H:%M:%S')} ip={ip} mem_before={len(HISTORY)}/{MAX_MESSAGES}")
55
+
56
  if not user_text:
57
+ print(f"[/v1/chat] ERROR missing text ip={ip}")
58
  return jsonify({"error": "Missing 'text'"}), 400
59
 
60
+ print(f"[/v1/chat] user_text_len={len(user_text)} user_text={user_text!r}")
61
+
62
  # Add user message to memory
63
  HISTORY.append(
64
  types.Content(
 
66
  parts=[types.Part.from_text(text=user_text)],
67
  )
68
  )
69
+ print(f"[/v1/chat] appended user msg -> mem_now={len(HISTORY)}/{MAX_MESSAGES}")
70
 
71
  config = types.GenerateContentConfig(
72
  system_instruction=[types.Part.from_text(text=SYSTEM_PROMPT)],
 
80
  config=config,
81
  )
82
  reply_text = (resp.text or "").strip()
83
+ print(f"[/v1/chat] gemini_ok reply_len={len(reply_text)}")
84
 
85
  # Add assistant message to memory
86
  HISTORY.append(
 
89
  parts=[types.Part.from_text(text=reply_text)],
90
  )
91
  )
92
+ print(f"[/v1/chat] appended model msg -> mem_now={len(HISTORY)}/{MAX_MESSAGES}")
93
+
94
+ dt_ms = int((time.time() - t0) * 1000)
95
+ print(f"[/v1/chat] END ip={ip} total_ms={dt_ms}")
96
 
97
  return jsonify({
98
  "input": user_text,
99
  "reply_text": reply_text,
100
  "model": MODEL,
101
  "memory_messages": len(HISTORY),
102
+ "total_ms": dt_ms,
103
  })
104
 
105
  except Exception as e:
 
108
  # Roll back last user message on failure
109
  if len(HISTORY) > 0 and getattr(HISTORY[-1], "role", None) == "user":
110
  HISTORY.pop()
111
+ print(f"[/v1/chat] rollback user msg -> mem_now={len(HISTORY)}/{MAX_MESSAGES}")
112
+
113
+ dt_ms = int((time.time() - t0) * 1000)
114
+ print(f"[/v1/chat] FAIL ip={ip} total_ms={dt_ms}")
115
 
116
  return jsonify({"error": "Gemini call failed"}), 500
117
 
118
+
119
  @app.post("/v1/reset")
120
  def reset():
121
+ ip = _client_ip()
122
+ print(f"[/v1/reset] {time.strftime('%Y-%m-%d %H:%M:%S')} ip={ip} clearing mem (was {len(HISTORY)}/{MAX_MESSAGES})")
123
  HISTORY.clear()
124
  return jsonify({"ok": True, "memory_messages": 0})
125
 
126
+
127
  if __name__ == "__main__":
128
  port = int(os.environ.get("PORT", "7860"))
129
+ print(f"[startup] model={MODEL} thinking_level={THINKING_LEVEL} max_messages={MAX_MESSAGES} port={port}")
130
  serve(app, host="0.0.0.0", port=port)