SamiKLN commited on
Commit
d59b7f5
·
verified ·
1 Parent(s): db0f953

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +38 -73
main.py CHANGED
@@ -43,7 +43,7 @@ client = InferenceClient(token=HF_TOKEN)
43
  MODELS = {
44
  "summary": "facebook/bart-large-cnn",
45
  "caption": "Salesforce/blip-image-captioning-large",
46
- "qa": "google/flan-t5-base" # Remplacé par un modèle plus petit compatible
47
  }
48
 
49
  # Modèles Pydantic
@@ -107,13 +107,6 @@ def extract_text_from_excel(file_path: str) -> str:
107
  logger.error(f"Excel extraction error: {e}")
108
  raise HTTPException(400, "Erreur d'extraction Excel")
109
 
110
- # Fonction helper pour trouver un fichier par ID sans récursion
111
- def find_file_by_id(file_id: str):
112
- for file_path in UPLOAD_FOLDER.iterdir():
113
- if file_path.name.startswith(file_id):
114
- return file_path
115
- return None
116
-
117
  async def process_uploaded_file(file: UploadFile) -> FileInfo:
118
  file_ext = Path(file.filename).suffix.lower()
119
  file_id = str(uuid.uuid4())
@@ -169,40 +162,31 @@ async def upload_files(files: List[UploadFile] = File(...)):
169
  @app.post("/api/summarize")
170
  async def summarize_document(request: SummaryRequest):
171
  try:
172
- file_path = find_file_by_id(request.file_id)
173
- if not file_path:
174
- raise HTTPException(404, "Fichier non trouvé")
175
-
176
  text = ""
177
 
178
  if file_path.suffix == ".pdf":
179
  text = extract_text_from_pdf(str(file_path))
180
- elif file_path.suffix == ".docx":
181
- text = extract_text_from_docx(str(file_path))
182
- elif file_path.suffix == ".pptx":
183
- text = extract_text_from_pptx(str(file_path))
184
- elif file_path.suffix in (".xlsx", ".xls"):
185
- text = extract_text_from_excel(str(file_path))
186
  else:
187
- try:
188
- with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
189
- text = f.read()
190
- except Exception as e:
191
- logger.error(f"Error reading file: {e}")
192
- raise HTTPException(400, "Erreur de lecture du fichier")
193
 
194
- # Limiter la taille pour éviter des erreurs avec l'API
195
- text_input = text[:4000] + ("..." if len(text) > 4000 else "")
 
 
 
 
 
 
196
 
197
  summary = client.summarization(
198
- text=text_input,
199
  model=MODELS["summary"],
200
  parameters={"max_length": request.max_length}
201
  )
202
 
203
  return {"summary": summary}
204
- except HTTPException as e:
205
- raise e
206
  except Exception as e:
207
  logger.error(f"Summarization error: {e}")
208
  raise HTTPException(500, f"Erreur de résumé: {str(e)}")
@@ -210,12 +194,7 @@ async def summarize_document(request: SummaryRequest):
210
  @app.post("/api/caption")
211
  async def caption_image(request: CaptionRequest):
212
  try:
213
- file_path = find_file_by_id(request.file_id)
214
- if not file_path:
215
- raise HTTPException(404, "Fichier non trouvé")
216
-
217
- if not file_path.suffix.lower() in [".jpg", ".jpeg", ".png", ".gif"]:
218
- raise HTTPException(400, "Le fichier doit être une image (jpg, png, gif)")
219
 
220
  with open(file_path, "rb") as image_file:
221
  image_data = image_file.read()
@@ -226,8 +205,6 @@ async def caption_image(request: CaptionRequest):
226
  )
227
 
228
  return {"caption": caption}
229
- except HTTPException as e:
230
- raise e
231
  except Exception as e:
232
  logger.error(f"Captioning error: {e}")
233
  raise HTTPException(500, f"Erreur de description: {str(e)}")
@@ -237,42 +214,34 @@ async def answer_question(request: QARequest):
237
  try:
238
  context = ""
239
  if request.file_id:
240
- file_path = find_file_by_id(request.file_id)
241
- if file_path:
242
- if file_path.suffix.lower() in (".jpg", ".jpeg", ".png", ".gif"):
243
- with open(file_path, "rb") as image_file:
244
- image_data = image_file.read()
245
- context = client.image_to_text(image=image_data, model=MODELS["caption"])
246
- else:
247
- if file_path.suffix == ".pdf":
248
- context = extract_text_from_pdf(str(file_path))
249
- elif file_path.suffix == ".docx":
250
- context = extract_text_from_docx(str(file_path))
251
- elif file_path.suffix == ".pptx":
252
- context = extract_text_from_pptx(str(file_path))
253
- elif file_path.suffix in (".xlsx", ".xls"):
254
- context = extract_text_from_excel(str(file_path))
255
- else:
256
- try:
257
- with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
258
- context = f.read()
259
- except Exception as e:
260
- logger.error(f"Error reading file: {e}")
261
- context = ""
262
  else:
263
- logger.warning(f"File not found: {request.file_id}")
 
 
 
 
264
 
265
- # Adapter le prompt au format attendu par le modèle T5
266
- inputs = f"answer question: {request.question} context: {context[:1500]}"
 
 
 
 
 
267
 
268
- response = client.text_generation(
269
- prompt=inputs,
270
  model=MODELS["qa"],
271
- max_new_tokens=250,
272
- temperature=0.7
273
  )
274
 
275
- return {"answer": response}
276
  except Exception as e:
277
  logger.error(f"QA error: {e}")
278
  raise HTTPException(500, f"Erreur de réponse: {str(e)}")
@@ -280,15 +249,11 @@ async def answer_question(request: QARequest):
280
  @app.get("/api/file/{file_id}")
281
  async def get_file(file_id: str):
282
  try:
283
- file_path = find_file_by_id(file_id)
284
- if not file_path:
285
- raise HTTPException(404, "Fichier non trouvé")
286
  return FileResponse(file_path)
287
- except HTTPException as e:
288
- raise e
289
  except Exception as e:
290
  logger.error(f"File retrieval error: {e}")
291
- raise HTTPException(500, f"Erreur lors de la récupération du fichier: {str(e)}")
292
 
293
  # Gestion des erreurs
294
  @app.exception_handler(HTTPException)
 
43
  MODELS = {
44
  "summary": "facebook/bart-large-cnn",
45
  "caption": "Salesforce/blip-image-captioning-large",
46
+ "qa": "deepseek-ai/DeepSeek-V2-Chat"
47
  }
48
 
49
  # Modèles Pydantic
 
107
  logger.error(f"Excel extraction error: {e}")
108
  raise HTTPException(400, "Erreur d'extraction Excel")
109
 
 
 
 
 
 
 
 
110
  async def process_uploaded_file(file: UploadFile) -> FileInfo:
111
  file_ext = Path(file.filename).suffix.lower()
112
  file_id = str(uuid.uuid4())
 
162
  @app.post("/api/summarize")
163
  async def summarize_document(request: SummaryRequest):
164
  try:
165
+ file_path = next(f for f in UPLOAD_FOLDER.glob(f"{request.file_id}*"))
 
 
 
166
  text = ""
167
 
168
  if file_path.suffix == ".pdf":
169
  text = extract_text_from_pdf(str(file_path))
 
 
 
 
 
 
170
  else:
171
+ with open(file_path, "r", encoding="utf-8") as f:
172
+ text = f.read()
 
 
 
 
173
 
174
+ prompt = f"""
175
+ Résumez ce document de manière concise en français.
176
+ Concentrez-vous sur les points principaux.
177
+ Le résumé doit faire environ {request.max_length} mots.
178
+
179
+ Document:
180
+ {text[:5000]}... [truncated]
181
+ """
182
 
183
  summary = client.summarization(
184
+ text=text,
185
  model=MODELS["summary"],
186
  parameters={"max_length": request.max_length}
187
  )
188
 
189
  return {"summary": summary}
 
 
190
  except Exception as e:
191
  logger.error(f"Summarization error: {e}")
192
  raise HTTPException(500, f"Erreur de résumé: {str(e)}")
 
194
  @app.post("/api/caption")
195
  async def caption_image(request: CaptionRequest):
196
  try:
197
+ file_path = next(f for f in UPLOAD_FOLDER.glob(f"{request.file_id}*"))
 
 
 
 
 
198
 
199
  with open(file_path, "rb") as image_file:
200
  image_data = image_file.read()
 
205
  )
206
 
207
  return {"caption": caption}
 
 
208
  except Exception as e:
209
  logger.error(f"Captioning error: {e}")
210
  raise HTTPException(500, f"Erreur de description: {str(e)}")
 
214
  try:
215
  context = ""
216
  if request.file_id:
217
+ file_path = next(f for f in UPLOAD_FOLDER.glob(f"{request.file_id}*"))
218
+
219
+ if file_path.suffix in (".jpg", ".jpeg", ".png"):
220
+ with open(file_path, "rb") as image_file:
221
+ image_data = image_file.read()
222
+ context = client.image_to_text(image=image_data, model=MODELS["caption"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  else:
224
+ if file_path.suffix == ".pdf":
225
+ context = extract_text_from_pdf(str(file_path))
226
+ else:
227
+ with open(file_path, "r", encoding="utf-8") as f:
228
+ context = f.read()
229
 
230
+ prompt = f"""
231
+ Vous êtes un assistant IA qui répond à des questions en français.
232
+ Répondez de manière précise et concise.
233
+ Contexte: {context[:3000]}
234
+ Question: {request.question}
235
+ Réponse:
236
+ """
237
 
238
+ response = client.chat_completion(
 
239
  model=MODELS["qa"],
240
+ messages=[{"role": "user", "content": prompt}],
241
+ max_tokens=500
242
  )
243
 
244
+ return {"answer": response.choices[0].message.content}
245
  except Exception as e:
246
  logger.error(f"QA error: {e}")
247
  raise HTTPException(500, f"Erreur de réponse: {str(e)}")
 
249
  @app.get("/api/file/{file_id}")
250
  async def get_file(file_id: str):
251
  try:
252
+ file_path = next(f for f in UPLOAD_FOLDER.glob(f"{file_id}*"))
 
 
253
  return FileResponse(file_path)
 
 
254
  except Exception as e:
255
  logger.error(f"File retrieval error: {e}")
256
+ raise HTTPException(404, "Fichier non trouvé")
257
 
258
  # Gestion des erreurs
259
  @app.exception_handler(HTTPException)