QuentinL52 commited on
Commit
2b16a80
·
verified ·
1 Parent(s): f00b750

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +99 -131
main.py CHANGED
@@ -2,17 +2,37 @@ import tempfile
2
  import requests
3
  import os
4
  import logging
5
- from fastapi import FastAPI, UploadFile, File, HTTPException
 
6
  from fastapi.concurrency import run_in_threadpool
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from pydantic import BaseModel, Field
9
  from typing import List, Dict, Any, Optional
 
10
  os.environ['HOME'] = '/tmp'
11
- # Configuration du logging
 
12
  logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger(__name__)
14
 
15
- # Imports avec gestion d'erreurs robuste
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  try:
17
  from src.cv_parsing_agents import CvParserAgent, create_fallback_cv_data
18
  CV_PARSING_AVAILABLE = True
@@ -41,16 +61,14 @@ except Exception as e:
41
  SCORING_AVAILABLE = False
42
  ContextualScoringEngine = None
43
 
44
- # Application FastAPI
45
  app = FastAPI(
46
  title="AIrh Interview Assistant",
47
- description="API pour l'analyse de CV et la simulation d'entretiens d'embauche",
48
- version="1.3.0",
49
  docs_url="/docs",
50
  redoc_url="/redoc"
51
  )
52
 
53
- # Configuration CORS pour HF Spaces
54
  app.add_middleware(
55
  CORSMiddleware,
56
  allow_origins=["*"],
@@ -59,10 +77,6 @@ app.add_middleware(
59
  allow_headers=["*"],
60
  )
61
 
62
- # Configuration API Celery
63
- CELERY_API_URL = os.getenv("CELERY_API_URL", "https://celery-7as1.onrender.com")
64
-
65
- # Modèles Pydantic
66
  class InterviewRequest(BaseModel):
67
  user_id: str = Field(..., example="user_12345")
68
  job_offer_id: str = Field(..., example="job_offer_abcde")
@@ -71,81 +85,76 @@ class InterviewRequest(BaseModel):
71
  messages: List[Dict[str, Any]]
72
  conversation_history: List[Dict[str, Any]]
73
 
74
- class AnalysisRequest(BaseModel):
75
- conversation_history: List[Dict[str, Any]]
76
- job_description_text: str
77
- candidate_id: Optional[str] = None
78
-
79
- class TaskResponse(BaseModel):
80
- task_id: str
81
  status: str
82
- result: Any = None
83
- message: Optional[str] = None
84
 
85
  class HealthCheck(BaseModel):
86
  status: str = "ok"
87
- celery_api_status: Optional[str] = None
88
  services: Dict[str, bool] = Field(default_factory=dict)
89
  message: str = "API AIrh fonctionnelle"
90
 
91
- # Endpoints
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  @app.get("/", response_model=HealthCheck, tags=["Status"])
93
  async def health_check():
94
- """Health check de l'API avec test de connectivité Celery."""
95
-
96
- # Test connexion Celery
97
- celery_status = "unknown"
98
- try:
99
- response = requests.get(f"{CELERY_API_URL}/", timeout=5)
100
- celery_status = "connected" if response.status_code == 200 else "error"
101
- except Exception:
102
- celery_status = "disconnected"
103
-
104
  services = {
 
105
  "cv_parsing": CV_PARSING_AVAILABLE,
106
  "interview_simulation": INTERVIEW_AVAILABLE,
107
- "scoring_engine": SCORING_AVAILABLE,
108
- "celery_api": celery_status == "connected"
109
  }
110
-
111
- return HealthCheck(
112
- celery_api_status=celery_status,
113
- services=services
114
- )
115
 
116
  @app.post("/parse-cv/", tags=["CV Parsing"])
117
  async def parse_cv(file: UploadFile = File(...)):
118
  """Analyse un CV PDF et extrait les informations structurées."""
119
-
120
  if not CV_PARSING_AVAILABLE:
121
- # Fallback si le parsing n'est pas disponible
122
- return create_fallback_cv_data() if create_fallback_cv_data else {
123
- "error": "Service de parsing de CV temporairement indisponible",
124
- "candidat": {
125
- "informations_personnelles": {"nom": "Test User"},
126
- "compétences": {"hard_skills": [], "soft_skills": []}
127
- }
128
- }
129
 
130
  if file.content_type != "application/pdf":
131
  raise HTTPException(status_code=400, detail="Fichier PDF requis")
132
-
133
  tmp_path = None
134
  try:
135
- # Sauvegarder le fichier temporairement
136
  contents = await file.read()
137
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
138
  tmp.write(contents)
139
  tmp_path = tmp.name
140
 
141
- # Traiter le CV
142
  cv_agent = CvParserAgent(pdf_path=tmp_path)
143
  parsed_data = await run_in_threadpool(cv_agent.process)
144
 
145
  if not parsed_data and create_fallback_cv_data:
146
  parsed_data = create_fallback_cv_data(tmp_path)
147
 
148
- # Scoring si disponible
149
  if SCORING_AVAILABLE and ContextualScoringEngine and parsed_data:
150
  try:
151
  scoring_engine = ContextualScoringEngine(parsed_data)
@@ -165,20 +174,16 @@ async def parse_cv(file: UploadFile = File(...)):
165
 
166
  finally:
167
  if tmp_path and os.path.exists(tmp_path):
168
- try:
169
- os.remove(tmp_path)
170
- except Exception:
171
- pass
172
 
173
  @app.post("/simulate-interview/", tags=["Interview"])
174
- async def simulate_interview(request: InterviewRequest):
175
- """Gère une conversation d'entretien d'embauche."""
176
-
177
- if not INTERVIEW_AVAILABLE:
178
- raise HTTPException(
179
- status_code=503,
180
- detail="Service de simulation d'entretien indisponible"
181
- )
182
 
183
  try:
184
  processor = InterviewProcessor(
@@ -188,82 +193,45 @@ async def simulate_interview(request: InterviewRequest):
188
  )
189
 
190
  result = await run_in_threadpool(processor.run, messages=request.messages)
191
- return {"response": result["messages"][-1].content}
192
 
193
- except Exception as e:
194
- logger.error(f"Erreur simulation entretien: {e}")
195
- raise HTTPException(status_code=500, detail=str(e))
196
-
197
- @app.post("/trigger-analysis/", response_model=TaskResponse, status_code=202, tags=["Analysis"])
198
- async def trigger_analysis(request: AnalysisRequest):
199
- """Déclenche une analyse asynchrone via l'API Celery."""
200
-
201
- try:
202
- response = requests.post(
203
- f"{CELERY_API_URL}/trigger-analysis",
204
- json=request.dict(),
205
- headers={"Content-Type": "application/json"},
206
- timeout=30
207
- )
208
 
209
- if response.status_code == 202:
210
- data = response.json()
211
- return TaskResponse(
212
- task_id=data["task_id"],
213
- status=data["status"],
214
- message="Analyse démarrée"
215
- )
216
- else:
217
- raise HTTPException(status_code=503, detail="Service d'analyse indisponible")
218
 
219
- except requests.RequestException:
220
- raise HTTPException(status_code=503, detail="API Celery inaccessible")
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  except Exception as e:
 
222
  raise HTTPException(status_code=500, detail=str(e))
223
 
224
- @app.get("/analysis-status/{task_id}", response_model=TaskResponse, tags=["Analysis"])
225
- async def get_analysis_status(task_id: str):
226
- """Récupère le statut d'une analyse."""
 
 
 
227
 
228
  try:
229
- response = requests.get(f"{CELERY_API_URL}/task-status/{task_id}", timeout=10)
230
-
231
- if response.status_code == 200:
232
- data = response.json()
233
- return TaskResponse(
234
- task_id=task_id,
235
- status=data["status"],
236
- result=data.get("result"),
237
- message=data.get("progress", "Statut récupéré")
238
- )
239
- else:
240
- raise HTTPException(status_code=503, detail="Service d'analyse indisponible")
241
-
242
- except requests.RequestException:
243
- raise HTTPException(status_code=503, detail="API Celery inaccessible")
244
  except Exception as e:
245
- raise HTTPException(status_code=500, detail=str(e))
246
-
247
- # Endpoint de debug pour HF Spaces
248
- @app.get("/debug", tags=["Debug"])
249
- async def debug_info():
250
- """Informations de debug pour le déploiement."""
251
- return {
252
- "environment": {
253
- "HF_HOME": os.getenv("HF_HOME"),
254
- "CELERY_API_URL": CELERY_API_URL,
255
- "PYTHONPATH": os.getenv("PYTHONPATH")
256
- },
257
- "services": {
258
- "cv_parsing": CV_PARSING_AVAILABLE,
259
- "interview_simulation": INTERVIEW_AVAILABLE,
260
- "scoring_engine": SCORING_AVAILABLE
261
- },
262
- "cache_dirs": {
263
- "/tmp/cache": os.path.exists("/tmp/cache"),
264
- "/app/cache": os.path.exists("/app/cache")
265
- }
266
- }
267
 
268
  if __name__ == "__main__":
269
  import uvicorn
 
2
  import requests
3
  import os
4
  import logging
5
+ import json
6
+ from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks
7
  from fastapi.concurrency import run_in_threadpool
8
  from fastapi.middleware.cors import CORSMiddleware
9
  from pydantic import BaseModel, Field
10
  from typing import List, Dict, Any, Optional
11
+
12
  os.environ['HOME'] = '/tmp'
13
+ os.makedirs('/tmp/feedbacks', exist_ok=True)
14
+
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
+
19
+ try:
20
+ from src.deep_learning_analyzer import MultiModelInterviewAnalyzer
21
+ from src.rag_handler import get_rag_handler
22
+ from src.crew.crew_pool import run_interview_analysis
23
+
24
+ analyzer_model = MultiModelInterviewAnalyzer()
25
+ rag_handler_instance = get_rag_handler()
26
+ MODELS_AVAILABLE = True
27
+ logger.info("✅ Modèles d'analyse et RAG pré-chargés avec succès")
28
+ except Exception as e:
29
+ logger.error(f"❌ Erreur lors du pré-chargement des modèles: {e}")
30
+ MODELS_AVAILABLE = False
31
+ analyzer_model = None
32
+ rag_handler_instance = None
33
+ run_interview_analysis = None
34
+
35
+
36
  try:
37
  from src.cv_parsing_agents import CvParserAgent, create_fallback_cv_data
38
  CV_PARSING_AVAILABLE = True
 
61
  SCORING_AVAILABLE = False
62
  ContextualScoringEngine = None
63
 
 
64
  app = FastAPI(
65
  title="AIrh Interview Assistant",
66
+ description="API pour l'analyse de CV et la simulation d'entretiens d'embauche avec analyse asynchrone.",
67
+ version="2.0.0",
68
  docs_url="/docs",
69
  redoc_url="/redoc"
70
  )
71
 
 
72
  app.add_middleware(
73
  CORSMiddleware,
74
  allow_origins=["*"],
 
77
  allow_headers=["*"],
78
  )
79
 
 
 
 
 
80
  class InterviewRequest(BaseModel):
81
  user_id: str = Field(..., example="user_12345")
82
  job_offer_id: str = Field(..., example="job_offer_abcde")
 
85
  messages: List[Dict[str, Any]]
86
  conversation_history: List[Dict[str, Any]]
87
 
88
+ class Feedback(BaseModel):
 
 
 
 
 
 
89
  status: str
90
+ feedback_data: Optional[Dict[str, Any]] = None
 
91
 
92
  class HealthCheck(BaseModel):
93
  status: str = "ok"
 
94
  services: Dict[str, bool] = Field(default_factory=dict)
95
  message: str = "API AIrh fonctionnelle"
96
 
97
+ def analysis_in_background(user_id: str, conversation_history: list, job_description_text: str):
98
+ """
99
+ Fonction exécutée en arrière-plan pour analyser l'entretien
100
+ et sauvegarder le résultat.
101
+ """
102
+ logger.info(f"Démarrage de l'analyse en arrière-plan pour l'utilisateur: {user_id}")
103
+ try:
104
+ if not MODELS_AVAILABLE:
105
+ raise RuntimeError("Les modèles d'analyse ne sont pas disponibles.")
106
+
107
+ report = run_interview_analysis(
108
+ conversation_history,
109
+ job_description_text,
110
+ analyzer_model,
111
+ rag_handler_instance
112
+ )
113
+
114
+ feedback_path = f"/tmp/feedbacks/{user_id}.json"
115
+ with open(feedback_path, "w", encoding="utf-8") as f:
116
+ json.dump({"status": "completed", "feedback_data": report}, f, ensure_ascii=False, indent=4)
117
+
118
+ logger.info(f"✅ Analyse terminée et sauvegardée pour l'utilisateur: {user_id}")
119
+ except Exception as e:
120
+ logger.error(f"❌ Erreur durant l'analyse en arrière-plan pour {user_id}: {e}")
121
+ feedback_path = f"/tmp/feedbacks/{user_id}.json"
122
+ with open(feedback_path, "w", encoding="utf-8") as f:
123
+ json.dump({"status": "error", "feedback_data": str(e)}, f, ensure_ascii=False, indent=4)
124
+
125
  @app.get("/", response_model=HealthCheck, tags=["Status"])
126
  async def health_check():
127
+ """Health check de l'API."""
 
 
 
 
 
 
 
 
 
128
  services = {
129
+ "models_loaded": MODELS_AVAILABLE,
130
  "cv_parsing": CV_PARSING_AVAILABLE,
131
  "interview_simulation": INTERVIEW_AVAILABLE,
132
+ "scoring_engine": SCORING_AVAILABLE
 
133
  }
134
+ return HealthCheck(services=services)
 
 
 
 
135
 
136
  @app.post("/parse-cv/", tags=["CV Parsing"])
137
  async def parse_cv(file: UploadFile = File(...)):
138
  """Analyse un CV PDF et extrait les informations structurées."""
 
139
  if not CV_PARSING_AVAILABLE:
140
+ return create_fallback_cv_data() if create_fallback_cv_data else {"error": "Service de parsing indisponible"}
 
 
 
 
 
 
 
141
 
142
  if file.content_type != "application/pdf":
143
  raise HTTPException(status_code=400, detail="Fichier PDF requis")
144
+
145
  tmp_path = None
146
  try:
 
147
  contents = await file.read()
148
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
149
  tmp.write(contents)
150
  tmp_path = tmp.name
151
 
 
152
  cv_agent = CvParserAgent(pdf_path=tmp_path)
153
  parsed_data = await run_in_threadpool(cv_agent.process)
154
 
155
  if not parsed_data and create_fallback_cv_data:
156
  parsed_data = create_fallback_cv_data(tmp_path)
157
 
 
158
  if SCORING_AVAILABLE and ContextualScoringEngine and parsed_data:
159
  try:
160
  scoring_engine = ContextualScoringEngine(parsed_data)
 
174
 
175
  finally:
176
  if tmp_path and os.path.exists(tmp_path):
177
+ os.remove(tmp_path)
 
 
 
178
 
179
  @app.post("/simulate-interview/", tags=["Interview"])
180
+ async def simulate_interview(request: InterviewRequest, background_tasks: BackgroundTasks):
181
+ """
182
+ Gère une conversation d'entretien. Si la conversation se termine,
183
+ lance une analyse en arrière-plan.
184
+ """
185
+ if not INTERVIEW_AVAILABLE or not MODELS_AVAILABLE:
186
+ raise HTTPException(status_code=503, detail="Service de simulation ou modèles indisponibles")
 
187
 
188
  try:
189
  processor = InterviewProcessor(
 
193
  )
194
 
195
  result = await run_in_threadpool(processor.run, messages=request.messages)
 
196
 
197
+ response_content = result["messages"][-1].content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
+ # Déclencher l'analyse si l'entretien est terminé
200
+ if "nous allons maintenant passer a l'analyse" in response_content.lower():
201
+ logger.info(f"Fin d'entretien détectée pour {request.user_id}. Lancement de l'analyse en arrière-plan.")
 
 
 
 
 
 
202
 
203
+ # Sauvegarder un statut initial
204
+ feedback_path = f"/tmp/feedbacks/{request.user_id}.json"
205
+ with open(feedback_path, "w", encoding="utf-8") as f:
206
+ json.dump({"status": "processing"}, f, ensure_ascii=False, indent=4)
207
+
208
+ job_description = request.job_offer.get('description', '')
209
+ background_tasks.add_task(
210
+ analysis_in_background,
211
+ request.user_id,
212
+ request.conversation_history + request.messages,
213
+ job_description
214
+ )
215
+
216
+ return {"response": response_content}
217
+
218
  except Exception as e:
219
+ logger.error(f"Erreur simulation entretien: {e}")
220
  raise HTTPException(status_code=500, detail=str(e))
221
 
222
+ @app.get("/get-feedback/{user_id}", response_model=Feedback, tags=["Analysis"])
223
+ async def get_feedback(user_id: str):
224
+ """Récupère le résultat de l'analyse post-entretien."""
225
+ feedback_path = f"/tmp/feedbacks/{user_id}.json"
226
+ if not os.path.exists(feedback_path):
227
+ raise HTTPException(status_code=404, detail="Feedback non trouvé ou non encore traité.")
228
 
229
  try:
230
+ with open(feedback_path, "r", encoding="utf-8") as f:
231
+ data = json.load(f)
232
+ return Feedback(**data)
 
 
 
 
 
 
 
 
 
 
 
 
233
  except Exception as e:
234
+ raise HTTPException(status_code=500, detail=f"Erreur à la lecture du feedback: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
  if __name__ == "__main__":
237
  import uvicorn