amitbhatt6075 commited on
Commit
c6cf010
Β·
1 Parent(s): 7959964

fix: Final definitive fix for initialization race condition

Browse files
Files changed (1) hide show
  1. api/main.py +52 -49
api/main.py CHANGED
@@ -30,7 +30,7 @@ from core.anomaly_detector import find_anomalies
30
  from core.matcher import load_embedding_model, rank_documents_by_similarity
31
  from core.utils import get_supabase_client, extract_colors_from_url
32
  from core.document_parser import parse_pdf_from_url
33
- from core.creative_chat import CreativeDirector
34
 
35
  try:
36
  from core.rag.store import VectorStore
@@ -41,18 +41,13 @@ except ImportError:
41
 
42
  def cached_response(func): return func
43
 
 
44
  ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
45
  MODELS_DIR = os.path.join(ROOT_DIR, 'models')
46
-
47
- # === FIX #2: Dynamic Model Downloading Logic ===
48
- # This replaces your old static LLAMA_MODEL_PATH
49
  MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
50
  MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
51
- # Hugging Face Spaces provides a writable directory at /data or we can fall back to /tmp
52
- MODEL_SAVE_DIRECTORY = os.path.join(os.environ.get("WRITABLE_DIR", "/tmp"), "llm_model")
53
- # This will be the final path to our model file once it's downloaded
54
  LLAMA_MODEL_PATH = os.path.join(MODEL_SAVE_DIRECTORY, MODEL_FILENAME)
55
- # ===============================================
56
 
57
  EMBEDDING_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
58
  EMBEDDING_MODEL_PATH = os.path.join(ROOT_DIR, 'embedding_model')
@@ -63,7 +58,7 @@ FINAL_EMBEDDING_PATH = EMBEDDING_MODEL_PATH if os.path.exists(EMBEDDING_MODEL_PA
63
  _llm_instance: Optional[Llama] = None
64
  _vector_store: Optional[Any] = None
65
  _ai_strategist: Optional[AIStrategist] = None
66
- _creative_director: Optional[CreativeDirector] = None
67
  _support_agent: Optional[SupportAgent] = None
68
  _budget_predictor = None
69
  _influencer_matcher = None
@@ -513,58 +508,60 @@ def startup_event():
513
  _earnings_optimizer, _earnings_encoder, _likes_predictor, _comments_predictor, \
514
  _revenue_forecaster, _performance_scorer
515
 
516
- # === MODEL DOWNLOAD AND LOAD LOGIC ===
517
  print("--- πŸš€ AI Service Starting Up... ---")
518
  try:
 
519
  os.makedirs(MODEL_SAVE_DIRECTORY, exist_ok=True)
 
 
520
  if not os.path.exists(LLAMA_MODEL_PATH):
521
  print(f" - LLM model not found locally. Downloading '{MODEL_FILENAME}'...")
 
522
  hf_hub_download(
523
  repo_id=MODEL_REPO,
524
  filename=MODEL_FILENAME,
525
  local_dir=MODEL_SAVE_DIRECTORY,
526
- local_dir_use_symlinks=False
527
  )
528
  print(" - βœ… Model downloaded successfully.")
529
  else:
530
- print(f" - LLM model found at {LLAMA_MODEL_PATH}. Skipping download.")
531
 
 
532
  print(" - Loading Llama LLM into memory...")
533
  _llm_instance = Llama(model_path=LLAMA_MODEL_PATH, n_gpu_layers=0, n_ctx=2048, verbose=False, use_mmap=False)
534
- print(" - βœ… LLM Loaded.")
535
 
536
  except Exception as e:
537
- print(f" - ❌ FATAL ERROR: Could not download or load LLM model: {e}")
538
- traceback.print_exc()
539
- # If LLM fails to load, we can't continue.
540
- # Set instance to None and the rest of the app will know.
541
- _llm_instance = None
542
- return # Stop the startup process here.
543
-
544
- # === INITIALIZE AI COMPONENTS (NOW THAT LLM IS LOADED) ===
545
- # This logic now runs ONLY IF the LLM loaded successfully.
546
- try:
547
- print(" - Initializing Creative Director...")
548
- _creative_director = CreativeDirector(llm_instance=_llm_instance)
549
- print(" - βœ… Creative Director is online.")
550
-
551
- if VectorStore:
552
- print(" - Initializing Vector Store...")
553
- _vector_store = VectorStore()
554
- print(" - βœ… RAG Engine Ready.")
555
-
556
- print(" - Initializing AI Strategist...")
557
- _ai_strategist = AIStrategist(llm_instance=_llm_instance, store=_vector_store)
558
- print(" - βœ… AI Strategist ready.")
559
-
560
- print(" - Initializing Support Agent...")
561
- _support_agent = SupportAgent(llm_instance=_llm_instance, embedding_path=EMBEDDING_MODEL_PATH, db_path=DB_PATH)
562
- print(" - βœ… Support Agent ready.")
563
- except Exception as e:
564
- print(f" - ❌ FAILED to initialize core AI components: {e}")
565
  traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
 
567
- # === LOAD SKLEARN MODELS (This part is independent of the LLM) ===
568
  print(" - Loading ML models from joblib files...")
569
  model_paths = {
570
  'budget': ('_budget_predictor', 'budget_predictor_v1.joblib'),
@@ -585,16 +582,20 @@ def startup_event():
585
  print(f" - Loaded {name} model.")
586
  except FileNotFoundError:
587
  globals()[var] = None
588
- print(f" - ⚠️ WARNING: Model '{name}' not found at {path}. Endpoint disabled.")
589
 
590
  print(" - Initializing Text Embedding Model...")
591
  load_embedding_model(EMBEDDING_MODEL_PATH)
592
 
593
- print("\n--- βœ… AI Service is fully operational! ---")
594
-
595
  @app.get("/", summary="Health Check")
596
  def read_root():
597
- return {"status": "AI Service is running"}
 
 
 
 
598
 
599
  def _cleanup_llm_response(data: dict) -> dict:
600
  """A robust helper to clean common messy JSON outputs from smaller LLMs."""
@@ -1941,7 +1942,7 @@ def generate_weekly_plan_route(request: WeeklyPlanRequest): # <--- async hata d
1941
  @app.post("/chat/creative", response_model=Dict[str, str], summary="Brainstorming chat with AI Creative Director")
1942
  def creative_chat_endpoint(request: CreativeChatRequest):
1943
  if not _creative_director:
1944
- raise HTTPException(status_code=503, detail="AI Creative Director is not available due to a startup error.")
1945
  try:
1946
  history_list = [m.model_dump() for m in request.history]
1947
  response_text = _creative_director.chat(
@@ -1952,13 +1953,14 @@ def creative_chat_endpoint(request: CreativeChatRequest):
1952
  return {"reply": response_text}
1953
  except Exception as e:
1954
  print(f"🚨 Creative Chat Error: {e}")
 
1955
  raise HTTPException(status_code=500, detail="An error occurred with the AI Director.")
1956
 
1957
 
1958
  @app.post("/generate/final-from-chat", response_model=FinalScriptResponse, summary="Generates final structured script from chat history")
1959
  def finalize_script_endpoint(request: FinalizeScriptRequest):
1960
  if not _creative_director:
1961
- raise HTTPException(status_code=503, detail="AI Creative Director is not available due to a startup error.")
1962
  try:
1963
  history_list = [m.model_dump() for m in request.history]
1964
  return _creative_director.generate_final_plan(
@@ -1967,4 +1969,5 @@ def finalize_script_endpoint(request: FinalizeScriptRequest):
1967
  )
1968
  except Exception as e:
1969
  print(f"🚨 Finalize Script Error: {e}")
1970
- raise HTTPException(status_code=500, detail="Failed to generate the final plan.")
 
 
30
  from core.matcher import load_embedding_model, rank_documents_by_similarity
31
  from core.utils import get_supabase_client, extract_colors_from_url
32
  from core.document_parser import parse_pdf_from_url
33
+ from core.creative_chat import CreativeDirector
34
 
35
  try:
36
  from core.rag.store import VectorStore
 
41
 
42
  def cached_response(func): return func
43
 
44
+
45
  ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
46
  MODELS_DIR = os.path.join(ROOT_DIR, 'models')
 
 
 
47
  MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
48
  MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
49
+ MODEL_SAVE_DIRECTORY = os.path.join(os.environ.get("WRITABLE_DIR", "/data"), "llm_model")
 
 
50
  LLAMA_MODEL_PATH = os.path.join(MODEL_SAVE_DIRECTORY, MODEL_FILENAME)
 
51
 
52
  EMBEDDING_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
53
  EMBEDDING_MODEL_PATH = os.path.join(ROOT_DIR, 'embedding_model')
 
58
  _llm_instance: Optional[Llama] = None
59
  _vector_store: Optional[Any] = None
60
  _ai_strategist: Optional[AIStrategist] = None
61
+ _creative_director: CreativeDirector | None = None
62
  _support_agent: Optional[SupportAgent] = None
63
  _budget_predictor = None
64
  _influencer_matcher = None
 
508
  _earnings_optimizer, _earnings_encoder, _likes_predictor, _comments_predictor, \
509
  _revenue_forecaster, _performance_scorer
510
 
511
+ # --- STEP 1: DOWNLOAD AND LOAD THE LLM MODEL ---
512
  print("--- πŸš€ AI Service Starting Up... ---")
513
  try:
514
+ # Create the directory where the model will be saved if it doesn't exist
515
  os.makedirs(MODEL_SAVE_DIRECTORY, exist_ok=True)
516
+
517
+ # Check if the model file already exists before trying to download it
518
  if not os.path.exists(LLAMA_MODEL_PATH):
519
  print(f" - LLM model not found locally. Downloading '{MODEL_FILENAME}'...")
520
+ # This function downloads the file from the Hub to the specified directory
521
  hf_hub_download(
522
  repo_id=MODEL_REPO,
523
  filename=MODEL_FILENAME,
524
  local_dir=MODEL_SAVE_DIRECTORY,
525
+ local_dir_use_symlinks=False # Important for container environments
526
  )
527
  print(" - βœ… Model downloaded successfully.")
528
  else:
529
+ print(f" - LLM model found locally at {LLAMA_MODEL_PATH}. Skipping download.")
530
 
531
+ # Now that the file is guaranteed to be there, load it into memory
532
  print(" - Loading Llama LLM into memory...")
533
  _llm_instance = Llama(model_path=LLAMA_MODEL_PATH, n_gpu_layers=0, n_ctx=2048, verbose=False, use_mmap=False)
534
+ print(" - βœ… LLM Loaded successfully.")
535
 
536
  except Exception as e:
537
+ # If anything in this block fails, the LLM is not usable.
538
+ print(f" - ❌ FATAL ERROR: Could not download or load the LLM model. LLM-dependent features will be disabled.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  traceback.print_exc()
540
+ _llm_instance = None # Ensure the global variable is None
541
+
542
+ # --- STEP 2: INITIALIZE ALL AI COMPONENTS THAT NEED THE LLM ---
543
+ # This part only runs if the LLM was loaded successfully (_llm_instance is not None)
544
+ if _llm_instance:
545
+ try:
546
+ print(" - Initializing AI components...")
547
+ _creative_director = CreativeDirector(llm_instance=_llm_instance)
548
+
549
+ if VectorStore:
550
+ _vector_store = VectorStore()
551
+ print(" - RAG Engine Ready.")
552
+
553
+ _ai_strategist = AIStrategist(llm_instance=_llm_instance, store=_vector_store)
554
+ _support_agent = SupportAgent(llm_instance=_llm_instance, embedding_path=EMBEDDING_MODEL_PATH, db_path=DB_PATH)
555
+
556
+ print(" - βœ… Core AI components (Director, Strategist, Agent) are online.")
557
+
558
+ except Exception as e:
559
+ print(f" - ❌ FAILED to initialize core AI components: {e}")
560
+ traceback.print_exc()
561
+ else:
562
+ print(" - ⚠️ SKIPPING initialization of LLM-dependent components because LLM failed to load.")
563
 
564
+ # --- STEP 3: LOAD ALL OTHER MODELS (These don't depend on the LLM) ---
565
  print(" - Loading ML models from joblib files...")
566
  model_paths = {
567
  'budget': ('_budget_predictor', 'budget_predictor_v1.joblib'),
 
582
  print(f" - Loaded {name} model.")
583
  except FileNotFoundError:
584
  globals()[var] = None
585
+ print(f" - ⚠️ WARNING: Model '{name}' not found at {path}. Endpoint will be disabled.")
586
 
587
  print(" - Initializing Text Embedding Model...")
588
  load_embedding_model(EMBEDDING_MODEL_PATH)
589
 
590
+ print("\n--- βœ… AI Service startup sequence finished! ---")
591
+
592
  @app.get("/", summary="Health Check")
593
  def read_root():
594
+ # We add a check here to see if the LLM loaded successfully during startup.
595
+ # This helps with debugging on the live server.
596
+ if _llm_instance is None:
597
+ return {"status": "AI Service is running, but the Core LLM FAILED to load. Check logs."}
598
+ return {"status": "AI Service is running and all models are loaded."}
599
 
600
  def _cleanup_llm_response(data: dict) -> dict:
601
  """A robust helper to clean common messy JSON outputs from smaller LLMs."""
 
1942
  @app.post("/chat/creative", response_model=Dict[str, str], summary="Brainstorming chat with AI Creative Director")
1943
  def creative_chat_endpoint(request: CreativeChatRequest):
1944
  if not _creative_director:
1945
+ raise HTTPException(status_code=503, detail="The AI Creative Director is not available due to a startup error.")
1946
  try:
1947
  history_list = [m.model_dump() for m in request.history]
1948
  response_text = _creative_director.chat(
 
1953
  return {"reply": response_text}
1954
  except Exception as e:
1955
  print(f"🚨 Creative Chat Error: {e}")
1956
+ traceback.print_exc()
1957
  raise HTTPException(status_code=500, detail="An error occurred with the AI Director.")
1958
 
1959
 
1960
  @app.post("/generate/final-from-chat", response_model=FinalScriptResponse, summary="Generates final structured script from chat history")
1961
  def finalize_script_endpoint(request: FinalizeScriptRequest):
1962
  if not _creative_director:
1963
+ raise HTTPException(status_code=503, detail="The AI Creative Director is not available due to a startup error.")
1964
  try:
1965
  history_list = [m.model_dump() for m in request.history]
1966
  return _creative_director.generate_final_plan(
 
1969
  )
1970
  except Exception as e:
1971
  print(f"🚨 Finalize Script Error: {e}")
1972
+ traceback.print_exc()
1973
+ raise HTTPException(status_code=500, detail="Failed to generate the final plan.")