nikhmr1235 commited on
Commit
2b5cccc
·
verified ·
1 Parent(s): f5dabf5

+ self-cleaning logic

Browse files

application will automatically clear out all old database directories every time it starts up, and it will
continue to clean up any session directories that have been idle for more than three hours while it's running. This
makes the application much more robust and suitable for a shared environment like Hugging Face Spaces.

Files changed (1) hide show
  1. app.py +43 -1
app.py CHANGED
@@ -10,6 +10,49 @@ from langchain.prompts import PromptTemplate
10
  from langchain_core.runnables import RunnablePassthrough
11
  from langchain_core.output_parsers import StrOutputParser
12
  import tempfile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  # Set the Google API key from environment variables
15
  if "GOOGLE_API_KEY" not in os.environ:
@@ -20,7 +63,6 @@ google_api_key = os.environ.get("GOOGLE_API_KEY")
20
  # Constants
21
  LLM_MODEL = "gemini-1.5-flash"
22
  EMBEDDING_MODEL = "models/embedding-001"
23
- CHROMA_DB_PATH = tempfile.gettempdir() + "/chroma_db"
24
 
25
  class SessionState:
26
  def __init__(self):
 
10
  from langchain_core.runnables import RunnablePassthrough
11
  from langchain_core.output_parsers import StrOutputParser
12
  import tempfile
13
+ import time
14
+ import threading
15
+
16
+ # --- Cleanup Configuration ---
17
+ CHROMA_DB_PATH = os.path.join(tempfile.gettempdir(), "chroma_db")
18
+ CLEANUP_INTERVAL_HOURS = 3 # Cleanup every 3 hours
19
+ SESSION_TTL_HOURS = 3 # Sessions older than 3 hours will be deleted
20
+
21
+ # --- Cleanup Functions ---
22
+ def cleanup_old_sessions():
23
+ """Deletes session directories older than SESSION_TTL_HOURS."""
24
+ while True:
25
+ now = time.time()
26
+ ttl_seconds = SESSION_TTL_HOURS * 3600
27
+
28
+ if not os.path.exists(CHROMA_DB_PATH):
29
+ time.sleep(CLEANUP_INTERVAL_HOURS * 3600)
30
+ continue
31
+
32
+ for session_id in os.listdir(CHROMA_DB_PATH):
33
+ session_path = os.path.join(CHROMA_DB_PATH, session_id)
34
+ if os.path.isdir(session_path):
35
+ try:
36
+ mod_time = os.path.getmtime(session_path)
37
+ if (now - mod_time) > ttl_seconds:
38
+ print(f"Cleaning up old session: {session_id}")
39
+ shutil.rmtree(session_path)
40
+ except Exception as e:
41
+ print(f"Error cleaning up session {session_id}: {e}")
42
+
43
+ time.sleep(CLEANUP_INTERVAL_HOURS * 3600)
44
+
45
+ # --- Initial Cleanup on Startup ---
46
+ print("Performing initial cleanup of old ChromaDB directories...")
47
+ if os.path.exists(CHROMA_DB_PATH):
48
+ shutil.rmtree(CHROMA_DB_PATH)
49
+ os.makedirs(CHROMA_DB_PATH)
50
+ print("Cleanup complete. Starting background cleanup thread.")
51
+
52
+ # --- Start Background Cleanup Thread ---
53
+ cleanup_thread = threading.Thread(target=cleanup_old_sessions, daemon=True)
54
+ cleanup_thread.start()
55
+
56
 
57
  # Set the Google API key from environment variables
58
  if "GOOGLE_API_KEY" not in os.environ:
 
63
  # Constants
64
  LLM_MODEL = "gemini-1.5-flash"
65
  EMBEDDING_MODEL = "models/embedding-001"
 
66
 
67
  class SessionState:
68
  def __init__(self):