Spaces:

bambadij
/

oyana

Sleeping

App Files Files Community

Bambo TRAORE [SNT DRPS/PEX/SDN] commited on May 24, 2025

Commit

096b6ed

1 Parent(s): f858a0a

add ocr

Browse files

Files changed (14) hide show

app/src/base/__pycache__/base_ia_service.cpython-313.pyc +0 -0
app/src/router/__pycache__/chat_router.cpython-313.pyc +0 -0
app/src/router/chat_router.py +5 -12
app/src/services/adminstrations/__pycache__/administration_service.cpython-313.pyc +0 -0
app/src/services/documents/__pycache__/llm_service.cpython-313.pyc +0 -0
app/src/services/documents/__pycache__/ocr_service.cpython-313.pyc +0 -0
app/src/services/documents/llm_service.py +17 -8
app/src/services/documents/ocr_service.py +18 -8
app/src/services/droitsociaux/__pycache__/droitsociaux_service.cpython-313.pyc +0 -0
app/src/services/education/__pycache__/education_service.cpython-313.pyc +0 -0
app/src/services/emploi/__pycache__/emploi_service.cpython-313.pyc +0 -0
app/src/services/logements/__pycache__/logement_service.cpython-313.pyc +0 -0
app/src/services/sante/__pycache__/sante_service.cpython-313.pyc +0 -0
requirements.txt +5 -3

app/src/base/__pycache__/base_ia_service.cpython-313.pyc CHANGED Viewed

Binary files a/app/src/base/__pycache__/base_ia_service.cpython-313.pyc and b/app/src/base/__pycache__/base_ia_service.cpython-313.pyc differ

app/src/router/__pycache__/chat_router.cpython-313.pyc CHANGED Viewed

Binary files a/app/src/router/__pycache__/chat_router.cpython-313.pyc and b/app/src/router/__pycache__/chat_router.cpython-313.pyc differ

app/src/router/chat_router.py CHANGED Viewed

@@ -45,20 +45,13 @@ async def analyze_document(
     file: UploadFile = File(...),
     question: str = Form(...)
 ):
-    # 1. Lire le fichier
     contents = await file.read()
-    # 2. Extraire le texte via OCR
-    extracted_text = ocr_service.extract_text_from_image(contents)
-    # 3. Construire le prompt
-    prompt = f"""Voici un document administratif. Voici les informations extraites :
-    {extracted_text}
-    Question : {question}
-    """
-    # 4. Envoyer la requête au modèle NVIDIA
-    response = llm_service.ask_model(prompt)
     return {"response": response}

     file: UploadFile = File(...),
     question: str = Form(...)
 ):
     contents = await file.read()
+    filename = file.filename
+    # Extraire le texte
+    extracted_text = ocr_service.extract_text(contents, filename)
+    # Envoyer au modèle
+    response = llm_service.ask_model(extracted_text, question)
     return {"response": response}

app/src/services/adminstrations/__pycache__/administration_service.cpython-313.pyc CHANGED Viewed

Binary files a/app/src/services/adminstrations/__pycache__/administration_service.cpython-313.pyc and b/app/src/services/adminstrations/__pycache__/administration_service.cpython-313.pyc differ

app/src/services/documents/__pycache__/llm_service.cpython-313.pyc CHANGED Viewed

Binary files a/app/src/services/documents/__pycache__/llm_service.cpython-313.pyc and b/app/src/services/documents/__pycache__/llm_service.cpython-313.pyc differ

app/src/services/documents/__pycache__/ocr_service.cpython-313.pyc CHANGED Viewed

Binary files a/app/src/services/documents/__pycache__/ocr_service.cpython-313.pyc and b/app/src/services/documents/__pycache__/ocr_service.cpython-313.pyc differ

app/src/services/documents/llm_service.py CHANGED Viewed

@@ -1,20 +1,29 @@
 # services/llm_service.py
 from ....src.base.nvidia_api_service import OpenClientService
 class LLMService:
     def __init__(self):
         self.client = OpenClientService().get_client()
-    def ask_model(self, prompt: str, historique=None) -> str:
-        # Construction des messages (avec historique si présent)
-        messages = [{"role": "system", "content": "Tu es un assistant administratif."}]
-        if historique:
-            messages.extend(historique)
-        messages.append({"role": "user", "content": prompt})
         response = self.client.chat.completions.create(
             model="meta/llama3-8b-instruct",
-            messages=messages,
             temperature=0.4
         )
         return response.choices[0].message.content

 # services/llm_service.py
 from ....src.base.nvidia_api_service import OpenClientService
 class LLMService:
     def __init__(self):
         self.client = OpenClientService().get_client()
+    def ask_model(self, extracted_text: str, user_question: str) -> str:
+        prompt = f"""
+Voici un document dont voici le contenu :
+\"\"\"
+{extracted_text}
+\"\"\"
+L'utilisateur demande : {user_question}
+Réponds de manière claire et pédagogique, comme si tu expliquais à quelqu’un qui découvre ce sujet.
+"""
         response = self.client.chat.completions.create(
             model="meta/llama3-8b-instruct",
+            messages=[
+                {"role": "system", "content": "Tu es un assistant administratif, patient et pédagogue."},
+                {"role": "user", "content": prompt}
+            ],
             temperature=0.4
         )
         return response.choices[0].message.content

app/src/services/documents/ocr_service.py CHANGED Viewed

@@ -1,15 +1,25 @@
-# services/ocr_service.py
-import easyocr
 import numpy as np
 import cv2
-import io
 class OCRService:
     def __init__(self):
         self.reader = easyocr.Reader(['fr', 'en'])
-    def extract_text_from_image(self, image_bytes: bytes) -> str:
-        np_img = np.frombuffer(image_bytes, np.uint8)
-        img = cv2.imdecode(np_img, cv2.IMREAD_COLOR)
-        result = self.reader.readtext(img, detail=0)
-        return "\n".join(result)

+from pdf2image import convert_from_bytes
 import numpy as np
 import cv2
+import easyocr
 class OCRService:
     def __init__(self):
         self.reader = easyocr.Reader(['fr', 'en'])
+    def extract_text(self, file_bytes: bytes, filename: str) -> str:
+        if filename.lower().endswith('.pdf'):
+            # PDF : on convertit chaque page en image
+            pages = convert_from_bytes(file_bytes)
+            all_text = []
+            for page in pages:
+                img = np.array(page)
+                text = self.reader.readtext(img, detail=0)
+                all_text.append("\n".join(text))
+            return "\n\n".join(all_text)
+        else:
+            # Image : lecture directe
+            np_img = np.frombuffer(file_bytes, np.uint8)
+            img = cv2.imdecode(np_img, cv2.IMREAD_COLOR)
+            text = self.reader.readtext(img, detail=0)
+            return "\n".join(text)

app/src/services/droitsociaux/__pycache__/droitsociaux_service.cpython-313.pyc CHANGED Viewed

Binary files a/app/src/services/droitsociaux/__pycache__/droitsociaux_service.cpython-313.pyc and b/app/src/services/droitsociaux/__pycache__/droitsociaux_service.cpython-313.pyc differ

app/src/services/education/__pycache__/education_service.cpython-313.pyc CHANGED Viewed

Binary files a/app/src/services/education/__pycache__/education_service.cpython-313.pyc and b/app/src/services/education/__pycache__/education_service.cpython-313.pyc differ

app/src/services/emploi/__pycache__/emploi_service.cpython-313.pyc CHANGED Viewed

Binary files a/app/src/services/emploi/__pycache__/emploi_service.cpython-313.pyc and b/app/src/services/emploi/__pycache__/emploi_service.cpython-313.pyc differ

app/src/services/logements/__pycache__/logement_service.cpython-313.pyc CHANGED Viewed

Binary files a/app/src/services/logements/__pycache__/logement_service.cpython-313.pyc and b/app/src/services/logements/__pycache__/logement_service.cpython-313.pyc differ

app/src/services/sante/__pycache__/sante_service.cpython-313.pyc CHANGED Viewed

Binary files a/app/src/services/sante/__pycache__/sante_service.cpython-313.pyc and b/app/src/services/sante/__pycache__/sante_service.cpython-313.pyc differ

requirements.txt CHANGED Viewed

@@ -8,11 +8,13 @@ aiohttp
 beautifulsoup4
 openai
 sounddevice
-numpy
 scipy
-pandas
 ffmpeg-python==0.2.0
 future==1.0.0
 SpeechRecognition
 elevenlabs
-easyocr

 beautifulsoup4
 openai
 sounddevice
 scipy
 ffmpeg-python==0.2.0
 future==1.0.0
 SpeechRecognition
 elevenlabs
+easyocr
+pdf2image
+opencv-python
+numpy
+pandas