Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -9,6 +9,7 @@ from fastapi.responses import HTMLResponse, JSONResponse, FileResponse
|
|
| 9 |
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
from pydantic import BaseModel
|
| 11 |
from huggingface_hub import InferenceClient
|
|
|
|
| 12 |
import fitz # PyMuPDF
|
| 13 |
from PIL import Image
|
| 14 |
import io
|
|
@@ -43,9 +44,12 @@ client = InferenceClient(token=HF_TOKEN)
|
|
| 43 |
MODELS = {
|
| 44 |
"summary": "facebook/bart-large-cnn",
|
| 45 |
"caption": "Salesforce/blip-image-captioning-large",
|
| 46 |
-
"qa": "
|
| 47 |
}
|
| 48 |
|
|
|
|
|
|
|
|
|
|
| 49 |
# Modèles Pydantic
|
| 50 |
class FileInfo(BaseModel):
|
| 51 |
file_id: str
|
|
@@ -171,15 +175,6 @@ async def summarize_document(request: SummaryRequest):
|
|
| 171 |
with open(file_path, "r", encoding="utf-8") as f:
|
| 172 |
text = f.read()
|
| 173 |
|
| 174 |
-
prompt = f"""
|
| 175 |
-
Résumez ce document de manière concise en français.
|
| 176 |
-
Concentrez-vous sur les points principaux.
|
| 177 |
-
Le résumé doit faire environ {request.max_length} mots.
|
| 178 |
-
|
| 179 |
-
Document:
|
| 180 |
-
{text[:5000]}... [truncated]
|
| 181 |
-
"""
|
| 182 |
-
|
| 183 |
summary = client.summarization(
|
| 184 |
text=text,
|
| 185 |
model=MODELS["summary"],
|
|
@@ -226,25 +221,17 @@ async def answer_question(request: QARequest):
|
|
| 226 |
else:
|
| 227 |
with open(file_path, "r", encoding="utf-8") as f:
|
| 228 |
context = f.read()
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
# Utilisation de text_generation au lieu de chat_completion
|
| 239 |
-
response = client.text_generation(
|
| 240 |
-
prompt=prompt,
|
| 241 |
-
model=MODELS["qa"],
|
| 242 |
-
max_new_tokens=500,
|
| 243 |
-
temperature=0.7,
|
| 244 |
-
top_p=0.9
|
| 245 |
-
)
|
| 246 |
-
|
| 247 |
-
return {"answer": response}
|
| 248 |
except Exception as e:
|
| 249 |
logger.error(f"QA error: {e}")
|
| 250 |
raise HTTPException(500, f"Erreur de réponse: {str(e)}")
|
|
|
|
| 9 |
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
from pydantic import BaseModel
|
| 11 |
from huggingface_hub import InferenceClient
|
| 12 |
+
from transformers import pipeline # Pour le pipeline QA
|
| 13 |
import fitz # PyMuPDF
|
| 14 |
from PIL import Image
|
| 15 |
import io
|
|
|
|
| 44 |
MODELS = {
|
| 45 |
"summary": "facebook/bart-large-cnn",
|
| 46 |
"caption": "Salesforce/blip-image-captioning-large",
|
| 47 |
+
"qa": "distilbert-base-cased-distilled-squad" # Nouveau modèle QA plus léger
|
| 48 |
}
|
| 49 |
|
| 50 |
+
# Pipeline QA pour distilbert-base-cased-distilled-squad
|
| 51 |
+
qa_pipeline = pipeline("question-answering", model=MODELS["qa"], tokenizer=MODELS["qa"])
|
| 52 |
+
|
| 53 |
# Modèles Pydantic
|
| 54 |
class FileInfo(BaseModel):
|
| 55 |
file_id: str
|
|
|
|
| 175 |
with open(file_path, "r", encoding="utf-8") as f:
|
| 176 |
text = f.read()
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
summary = client.summarization(
|
| 179 |
text=text,
|
| 180 |
model=MODELS["summary"],
|
|
|
|
| 221 |
else:
|
| 222 |
with open(file_path, "r", encoding="utf-8") as f:
|
| 223 |
context = f.read()
|
| 224 |
+
|
| 225 |
+
# Utiliser le pipeline QA pour obtenir la réponse
|
| 226 |
+
result = qa_pipeline(question=request.question, context=context)
|
| 227 |
|
| 228 |
+
return {
|
| 229 |
+
"answer": result["answer"],
|
| 230 |
+
"confidence": result["score"]
|
| 231 |
+
}
|
| 232 |
+
except StopIteration:
|
| 233 |
+
logger.error("File not found")
|
| 234 |
+
raise HTTPException(404, "Fichier non trouvé")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
except Exception as e:
|
| 236 |
logger.error(f"QA error: {e}")
|
| 237 |
raise HTTPException(500, f"Erreur de réponse: {str(e)}")
|