Spaces:
Sleeping
Sleeping
+ self-cleaning logic
Browse filesapplication will automatically clear out all old database directories every time it starts up, and it will
continue to clean up any session directories that have been idle for more than three hours while it's running. This
makes the application much more robust and suitable for a shared environment like Hugging Face Spaces.
app.py
CHANGED
|
@@ -10,6 +10,49 @@ from langchain.prompts import PromptTemplate
|
|
| 10 |
from langchain_core.runnables import RunnablePassthrough
|
| 11 |
from langchain_core.output_parsers import StrOutputParser
|
| 12 |
import tempfile
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Set the Google API key from environment variables
|
| 15 |
if "GOOGLE_API_KEY" not in os.environ:
|
|
@@ -20,7 +63,6 @@ google_api_key = os.environ.get("GOOGLE_API_KEY")
|
|
| 20 |
# Constants
|
| 21 |
LLM_MODEL = "gemini-1.5-flash"
|
| 22 |
EMBEDDING_MODEL = "models/embedding-001"
|
| 23 |
-
CHROMA_DB_PATH = tempfile.gettempdir() + "/chroma_db"
|
| 24 |
|
| 25 |
class SessionState:
|
| 26 |
def __init__(self):
|
|
|
|
| 10 |
from langchain_core.runnables import RunnablePassthrough
|
| 11 |
from langchain_core.output_parsers import StrOutputParser
|
| 12 |
import tempfile
|
| 13 |
+
import time
|
| 14 |
+
import threading
|
| 15 |
+
|
| 16 |
+
# --- Cleanup Configuration ---
|
| 17 |
+
CHROMA_DB_PATH = os.path.join(tempfile.gettempdir(), "chroma_db")
|
| 18 |
+
CLEANUP_INTERVAL_HOURS = 3 # Cleanup every 3 hours
|
| 19 |
+
SESSION_TTL_HOURS = 3 # Sessions older than 3 hours will be deleted
|
| 20 |
+
|
| 21 |
+
# --- Cleanup Functions ---
|
| 22 |
+
def cleanup_old_sessions():
|
| 23 |
+
"""Deletes session directories older than SESSION_TTL_HOURS."""
|
| 24 |
+
while True:
|
| 25 |
+
now = time.time()
|
| 26 |
+
ttl_seconds = SESSION_TTL_HOURS * 3600
|
| 27 |
+
|
| 28 |
+
if not os.path.exists(CHROMA_DB_PATH):
|
| 29 |
+
time.sleep(CLEANUP_INTERVAL_HOURS * 3600)
|
| 30 |
+
continue
|
| 31 |
+
|
| 32 |
+
for session_id in os.listdir(CHROMA_DB_PATH):
|
| 33 |
+
session_path = os.path.join(CHROMA_DB_PATH, session_id)
|
| 34 |
+
if os.path.isdir(session_path):
|
| 35 |
+
try:
|
| 36 |
+
mod_time = os.path.getmtime(session_path)
|
| 37 |
+
if (now - mod_time) > ttl_seconds:
|
| 38 |
+
print(f"Cleaning up old session: {session_id}")
|
| 39 |
+
shutil.rmtree(session_path)
|
| 40 |
+
except Exception as e:
|
| 41 |
+
print(f"Error cleaning up session {session_id}: {e}")
|
| 42 |
+
|
| 43 |
+
time.sleep(CLEANUP_INTERVAL_HOURS * 3600)
|
| 44 |
+
|
| 45 |
+
# --- Initial Cleanup on Startup ---
|
| 46 |
+
print("Performing initial cleanup of old ChromaDB directories...")
|
| 47 |
+
if os.path.exists(CHROMA_DB_PATH):
|
| 48 |
+
shutil.rmtree(CHROMA_DB_PATH)
|
| 49 |
+
os.makedirs(CHROMA_DB_PATH)
|
| 50 |
+
print("Cleanup complete. Starting background cleanup thread.")
|
| 51 |
+
|
| 52 |
+
# --- Start Background Cleanup Thread ---
|
| 53 |
+
cleanup_thread = threading.Thread(target=cleanup_old_sessions, daemon=True)
|
| 54 |
+
cleanup_thread.start()
|
| 55 |
+
|
| 56 |
|
| 57 |
# Set the Google API key from environment variables
|
| 58 |
if "GOOGLE_API_KEY" not in os.environ:
|
|
|
|
| 63 |
# Constants
|
| 64 |
LLM_MODEL = "gemini-1.5-flash"
|
| 65 |
EMBEDDING_MODEL = "models/embedding-001"
|
|
|
|
| 66 |
|
| 67 |
class SessionState:
|
| 68 |
def __init__(self):
|