Bambo TRAORE [SNT DRPS/PEX/SDN] commited on
Commit ·
096b6ed
1
Parent(s): f858a0a
add ocr
Browse files- app/src/base/__pycache__/base_ia_service.cpython-313.pyc +0 -0
- app/src/router/__pycache__/chat_router.cpython-313.pyc +0 -0
- app/src/router/chat_router.py +5 -12
- app/src/services/adminstrations/__pycache__/administration_service.cpython-313.pyc +0 -0
- app/src/services/documents/__pycache__/llm_service.cpython-313.pyc +0 -0
- app/src/services/documents/__pycache__/ocr_service.cpython-313.pyc +0 -0
- app/src/services/documents/llm_service.py +17 -8
- app/src/services/documents/ocr_service.py +18 -8
- app/src/services/droitsociaux/__pycache__/droitsociaux_service.cpython-313.pyc +0 -0
- app/src/services/education/__pycache__/education_service.cpython-313.pyc +0 -0
- app/src/services/emploi/__pycache__/emploi_service.cpython-313.pyc +0 -0
- app/src/services/logements/__pycache__/logement_service.cpython-313.pyc +0 -0
- app/src/services/sante/__pycache__/sante_service.cpython-313.pyc +0 -0
- requirements.txt +5 -3
app/src/base/__pycache__/base_ia_service.cpython-313.pyc
CHANGED
|
Binary files a/app/src/base/__pycache__/base_ia_service.cpython-313.pyc and b/app/src/base/__pycache__/base_ia_service.cpython-313.pyc differ
|
|
|
app/src/router/__pycache__/chat_router.cpython-313.pyc
CHANGED
|
Binary files a/app/src/router/__pycache__/chat_router.cpython-313.pyc and b/app/src/router/__pycache__/chat_router.cpython-313.pyc differ
|
|
|
app/src/router/chat_router.py
CHANGED
|
@@ -45,20 +45,13 @@ async def analyze_document(
|
|
| 45 |
file: UploadFile = File(...),
|
| 46 |
question: str = Form(...)
|
| 47 |
):
|
| 48 |
-
# 1. Lire le fichier
|
| 49 |
contents = await file.read()
|
|
|
|
| 50 |
|
| 51 |
-
#
|
| 52 |
-
extracted_text = ocr_service.
|
| 53 |
|
| 54 |
-
#
|
| 55 |
-
|
| 56 |
-
{extracted_text}
|
| 57 |
-
|
| 58 |
-
Question : {question}
|
| 59 |
-
"""
|
| 60 |
-
|
| 61 |
-
# 4. Envoyer la requête au modèle NVIDIA
|
| 62 |
-
response = llm_service.ask_model(prompt)
|
| 63 |
|
| 64 |
return {"response": response}
|
|
|
|
| 45 |
file: UploadFile = File(...),
|
| 46 |
question: str = Form(...)
|
| 47 |
):
|
|
|
|
| 48 |
contents = await file.read()
|
| 49 |
+
filename = file.filename
|
| 50 |
|
| 51 |
+
# Extraire le texte
|
| 52 |
+
extracted_text = ocr_service.extract_text(contents, filename)
|
| 53 |
|
| 54 |
+
# Envoyer au modèle
|
| 55 |
+
response = llm_service.ask_model(extracted_text, question)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
return {"response": response}
|
app/src/services/adminstrations/__pycache__/administration_service.cpython-313.pyc
CHANGED
|
Binary files a/app/src/services/adminstrations/__pycache__/administration_service.cpython-313.pyc and b/app/src/services/adminstrations/__pycache__/administration_service.cpython-313.pyc differ
|
|
|
app/src/services/documents/__pycache__/llm_service.cpython-313.pyc
CHANGED
|
Binary files a/app/src/services/documents/__pycache__/llm_service.cpython-313.pyc and b/app/src/services/documents/__pycache__/llm_service.cpython-313.pyc differ
|
|
|
app/src/services/documents/__pycache__/ocr_service.cpython-313.pyc
CHANGED
|
Binary files a/app/src/services/documents/__pycache__/ocr_service.cpython-313.pyc and b/app/src/services/documents/__pycache__/ocr_service.cpython-313.pyc differ
|
|
|
app/src/services/documents/llm_service.py
CHANGED
|
@@ -1,20 +1,29 @@
|
|
| 1 |
# services/llm_service.py
|
| 2 |
from ....src.base.nvidia_api_service import OpenClientService
|
| 3 |
-
|
| 4 |
class LLMService:
|
| 5 |
def __init__(self):
|
| 6 |
self.client = OpenClientService().get_client()
|
| 7 |
|
| 8 |
-
def ask_model(self,
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
response = self.client.chat.completions.create(
|
| 16 |
model="meta/llama3-8b-instruct",
|
| 17 |
-
messages=
|
|
|
|
|
|
|
|
|
|
| 18 |
temperature=0.4
|
| 19 |
)
|
| 20 |
return response.choices[0].message.content
|
|
|
|
|
|
| 1 |
# services/llm_service.py
|
| 2 |
from ....src.base.nvidia_api_service import OpenClientService
|
| 3 |
+
|
| 4 |
class LLMService:
|
| 5 |
def __init__(self):
|
| 6 |
self.client = OpenClientService().get_client()
|
| 7 |
|
| 8 |
+
def ask_model(self, extracted_text: str, user_question: str) -> str:
|
| 9 |
+
prompt = f"""
|
| 10 |
+
Voici un document dont voici le contenu :
|
| 11 |
+
|
| 12 |
+
\"\"\"
|
| 13 |
+
{extracted_text}
|
| 14 |
+
\"\"\"
|
| 15 |
|
| 16 |
+
L'utilisateur demande : {user_question}
|
| 17 |
+
|
| 18 |
+
Réponds de manière claire et pédagogique, comme si tu expliquais à quelqu’un qui découvre ce sujet.
|
| 19 |
+
"""
|
| 20 |
response = self.client.chat.completions.create(
|
| 21 |
model="meta/llama3-8b-instruct",
|
| 22 |
+
messages=[
|
| 23 |
+
{"role": "system", "content": "Tu es un assistant administratif, patient et pédagogue."},
|
| 24 |
+
{"role": "user", "content": prompt}
|
| 25 |
+
],
|
| 26 |
temperature=0.4
|
| 27 |
)
|
| 28 |
return response.choices[0].message.content
|
| 29 |
+
|
app/src/services/documents/ocr_service.py
CHANGED
|
@@ -1,15 +1,25 @@
|
|
| 1 |
-
|
| 2 |
-
import easyocr
|
| 3 |
import numpy as np
|
| 4 |
import cv2
|
| 5 |
-
import
|
| 6 |
|
| 7 |
class OCRService:
|
| 8 |
def __init__(self):
|
| 9 |
self.reader = easyocr.Reader(['fr', 'en'])
|
| 10 |
|
| 11 |
-
def
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pdf2image import convert_from_bytes
|
|
|
|
| 2 |
import numpy as np
|
| 3 |
import cv2
|
| 4 |
+
import easyocr
|
| 5 |
|
| 6 |
class OCRService:
|
| 7 |
def __init__(self):
|
| 8 |
self.reader = easyocr.Reader(['fr', 'en'])
|
| 9 |
|
| 10 |
+
def extract_text(self, file_bytes: bytes, filename: str) -> str:
|
| 11 |
+
if filename.lower().endswith('.pdf'):
|
| 12 |
+
# PDF : on convertit chaque page en image
|
| 13 |
+
pages = convert_from_bytes(file_bytes)
|
| 14 |
+
all_text = []
|
| 15 |
+
for page in pages:
|
| 16 |
+
img = np.array(page)
|
| 17 |
+
text = self.reader.readtext(img, detail=0)
|
| 18 |
+
all_text.append("\n".join(text))
|
| 19 |
+
return "\n\n".join(all_text)
|
| 20 |
+
else:
|
| 21 |
+
# Image : lecture directe
|
| 22 |
+
np_img = np.frombuffer(file_bytes, np.uint8)
|
| 23 |
+
img = cv2.imdecode(np_img, cv2.IMREAD_COLOR)
|
| 24 |
+
text = self.reader.readtext(img, detail=0)
|
| 25 |
+
return "\n".join(text)
|
app/src/services/droitsociaux/__pycache__/droitsociaux_service.cpython-313.pyc
CHANGED
|
Binary files a/app/src/services/droitsociaux/__pycache__/droitsociaux_service.cpython-313.pyc and b/app/src/services/droitsociaux/__pycache__/droitsociaux_service.cpython-313.pyc differ
|
|
|
app/src/services/education/__pycache__/education_service.cpython-313.pyc
CHANGED
|
Binary files a/app/src/services/education/__pycache__/education_service.cpython-313.pyc and b/app/src/services/education/__pycache__/education_service.cpython-313.pyc differ
|
|
|
app/src/services/emploi/__pycache__/emploi_service.cpython-313.pyc
CHANGED
|
Binary files a/app/src/services/emploi/__pycache__/emploi_service.cpython-313.pyc and b/app/src/services/emploi/__pycache__/emploi_service.cpython-313.pyc differ
|
|
|
app/src/services/logements/__pycache__/logement_service.cpython-313.pyc
CHANGED
|
Binary files a/app/src/services/logements/__pycache__/logement_service.cpython-313.pyc and b/app/src/services/logements/__pycache__/logement_service.cpython-313.pyc differ
|
|
|
app/src/services/sante/__pycache__/sante_service.cpython-313.pyc
CHANGED
|
Binary files a/app/src/services/sante/__pycache__/sante_service.cpython-313.pyc and b/app/src/services/sante/__pycache__/sante_service.cpython-313.pyc differ
|
|
|
requirements.txt
CHANGED
|
@@ -8,11 +8,13 @@ aiohttp
|
|
| 8 |
beautifulsoup4
|
| 9 |
openai
|
| 10 |
sounddevice
|
| 11 |
-
numpy
|
| 12 |
scipy
|
| 13 |
-
pandas
|
| 14 |
ffmpeg-python==0.2.0
|
| 15 |
future==1.0.0
|
| 16 |
SpeechRecognition
|
| 17 |
elevenlabs
|
| 18 |
-
easyocr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
beautifulsoup4
|
| 9 |
openai
|
| 10 |
sounddevice
|
|
|
|
| 11 |
scipy
|
|
|
|
| 12 |
ffmpeg-python==0.2.0
|
| 13 |
future==1.0.0
|
| 14 |
SpeechRecognition
|
| 15 |
elevenlabs
|
| 16 |
+
easyocr
|
| 17 |
+
pdf2image
|
| 18 |
+
opencv-python
|
| 19 |
+
numpy
|
| 20 |
+
pandas
|