Spaces:
Sleeping
Sleeping
Update agents/document_agent.py
Browse files- agents/document_agent.py +43 -3
agents/document_agent.py
CHANGED
|
@@ -13,6 +13,10 @@ import google.generativeai as genai
|
|
| 13 |
from dotenv import load_dotenv
|
| 14 |
from datetime import datetime
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
from agents.base_agent import BaseAgent
|
| 17 |
from state import (
|
| 18 |
InvoiceProcessingState, InvoiceData, ItemDetail,
|
|
@@ -114,6 +118,8 @@ class DocumentAgent(BaseAgent):
|
|
| 114 |
genai.configure(api_key=self.api_key)
|
| 115 |
# genai.configure(api_key=os.getenv("GEMINI_API_KEY_7"))
|
| 116 |
self.model = genai.GenerativeModel("gemini-2.5-flash")
|
|
|
|
|
|
|
| 117 |
|
| 118 |
def generate(self, prompt):
|
| 119 |
try:
|
|
@@ -175,6 +181,38 @@ class DocumentAgent(BaseAgent):
|
|
| 175 |
self._should_escalate(state, reason=str(e))
|
| 176 |
return state
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
async def _extract_text_from_pdf(self, file_name: str) -> str:
|
| 180 |
# pass
|
|
@@ -192,9 +230,11 @@ class DocumentAgent(BaseAgent):
|
|
| 192 |
with pdfplumber.open(file_name) as pdf:
|
| 193 |
for page in pdf.pages:
|
| 194 |
text += page.extract_text() or ""
|
| 195 |
-
except Exception
|
| 196 |
-
self.logger.logger.
|
| 197 |
-
|
|
|
|
|
|
|
| 198 |
return text
|
| 199 |
|
| 200 |
async def _parse_invoice_with_ai(self, text: str) -> InvoiceData:
|
|
|
|
| 13 |
from dotenv import load_dotenv
|
| 14 |
from datetime import datetime
|
| 15 |
|
| 16 |
+
from paddleocr import PaddleOCR
|
| 17 |
+
import io
|
| 18 |
+
from PIL import Image
|
| 19 |
+
|
| 20 |
from agents.base_agent import BaseAgent
|
| 21 |
from state import (
|
| 22 |
InvoiceProcessingState, InvoiceData, ItemDetail,
|
|
|
|
| 118 |
genai.configure(api_key=self.api_key)
|
| 119 |
# genai.configure(api_key=os.getenv("GEMINI_API_KEY_7"))
|
| 120 |
self.model = genai.GenerativeModel("gemini-2.5-flash")
|
| 121 |
+
# Initialize PaddleOCR (English example)
|
| 122 |
+
self.ocr = PaddleOCR(use_angle_cls=True, lang="en")
|
| 123 |
|
| 124 |
def generate(self, prompt):
|
| 125 |
try:
|
|
|
|
| 181 |
self._should_escalate(state, reason=str(e))
|
| 182 |
return state
|
| 183 |
|
| 184 |
+
async def _extract_with_paddle_ocr(self, file_name: str) -> str:
|
| 185 |
+
full_text = ""
|
| 186 |
+
|
| 187 |
+
try:
|
| 188 |
+
doc = fitz.open(file_name)
|
| 189 |
+
|
| 190 |
+
for page_num in range(len(doc)):
|
| 191 |
+
page = doc[page_num]
|
| 192 |
+
|
| 193 |
+
# Convert PDF page to image
|
| 194 |
+
pix = page.get_pixmap(dpi=300)
|
| 195 |
+
img = Image.open(io.BytesIO(pix.tobytes("png")))
|
| 196 |
+
img_np = np.array(img)
|
| 197 |
+
|
| 198 |
+
# Run OCR
|
| 199 |
+
result = self.ocr.ocr(img_np, cls=True)
|
| 200 |
+
|
| 201 |
+
# Extract text lines
|
| 202 |
+
for line in result:
|
| 203 |
+
for word_info in line:
|
| 204 |
+
full_text += word_info[1][0] + " "
|
| 205 |
+
|
| 206 |
+
full_text += "\n"
|
| 207 |
+
|
| 208 |
+
doc.close()
|
| 209 |
+
self.logger.logger.info("[DocumentAgent] PaddleOCR extraction completed.")
|
| 210 |
+
|
| 211 |
+
except Exception as e:
|
| 212 |
+
self.logger.logger.error(f"[DocumentAgent] PaddleOCR failed: {e}")
|
| 213 |
+
print('text from ocr........', full_text)
|
| 214 |
+
return full_text
|
| 215 |
+
|
| 216 |
|
| 217 |
async def _extract_text_from_pdf(self, file_name: str) -> str:
|
| 218 |
# pass
|
|
|
|
| 230 |
with pdfplumber.open(file_name) as pdf:
|
| 231 |
for page in pdf.pages:
|
| 232 |
text += page.extract_text() or ""
|
| 233 |
+
except Exception:
|
| 234 |
+
self.logger.logger.info("[DocumentAgent] Falling back to PaddleOCR...")
|
| 235 |
+
|
| 236 |
+
# Final fallback → PaddleOCR
|
| 237 |
+
text = await self._extract_with_paddle_ocr(file_name)
|
| 238 |
return text
|
| 239 |
|
| 240 |
async def _parse_invoice_with_ai(self, text: str) -> InvoiceData:
|