Spaces:
Sleeping
Sleeping
| # ========================================= | |
| # information_extraction.py | |
| # CrewAI + OpenAI Vision Logic | |
| # ========================================= | |
| import base64 | |
| from crewai import Agent, Task, Crew, Process | |
| from crewai.tools import tool | |
| from langchain_openai import ChatOpenAI | |
| from openai import OpenAI | |
| # OpenAI client for vision | |
| vision_client = OpenAI() | |
| # ----------------------------------------- | |
| # Vision Tool | |
| # ----------------------------------------- | |
| def read_invoice_image(image_path: str) -> str: | |
| """ | |
| Reads an invoice image and extracts raw invoice text | |
| using OpenAI Vision (Responses API). | |
| """ | |
| with open(image_path, "rb") as f: | |
| image_base64 = base64.b64encode(f.read()).decode("utf-8") | |
| response = vision_client.responses.create( | |
| model="gpt-4.1-mini", | |
| input=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "input_text", | |
| "text": ( | |
| "Extract vendor name, tax id, invoice number, " | |
| "invoice date, items table (description, quantity, net price), " | |
| "and total gross from this invoice." | |
| ), | |
| }, | |
| { | |
| "type": "input_image", | |
| "image_url": f"data:image/jpeg;base64,{image_base64}", | |
| }, | |
| ], | |
| } | |
| ], | |
| ) | |
| return response.output_text | |
| # ----------------------------------------- | |
| # Main Extraction Function | |
| # ----------------------------------------- | |
| def extract_invoice(image_path: str): | |
| """ | |
| Main entry point used by Gradio / API | |
| """ | |
| llm = ChatOpenAI( | |
| model="gpt-4.1-mini", | |
| temperature=0 | |
| ) | |
| # Agent 1: OCR | |
| visual_reader = Agent( | |
| role="OCR Specialist", | |
| goal="Extract invoice data from images", | |
| backstory=( | |
| "You cannot see images directly. " | |
| "You must ALWAYS use the Invoice Image Reader tool." | |
| ), | |
| tools=[read_invoice_image], | |
| llm=llm, | |
| verbose=True, | |
| ) | |
| # Agent 2: JSON Formatter | |
| json_architect = Agent( | |
| role="Data Engineer", | |
| goal="Convert extracted invoice text into structured JSON", | |
| backstory="You normalize numbers and dates and output strict JSON.", | |
| llm=llm, | |
| verbose=False, | |
| ) | |
| # Task 1 | |
| extraction_task = Task( | |
| description=( | |
| f"Use the Invoice Image Reader tool to read the invoice image " | |
| f"at path '{image_path}'. Extract vendor, tax id, invoice number, " | |
| f"date, item rows, and total gross." | |
| ), | |
| expected_output="Structured invoice text.", | |
| agent=visual_reader, | |
| ) | |
| # Task 2 | |
| formatting_task = Task( | |
| description=( | |
| "Convert the extracted invoice text into JSON:\n\n" | |
| "{\n" | |
| " 'invoice_no': str,\n" | |
| " 'date': 'YYYY-MM-DD',\n" | |
| " 'vendor': {'name': str, 'tax_id': str},\n" | |
| " 'items': [{'desc': str, 'qty': float, 'net': float}],\n" | |
| " 'total_gross': float\n" | |
| "}\n\n" | |
| "Rules:\n" | |
| "- Replace commas with dots in numbers\n" | |
| "- Output ONLY valid JSON\n" | |
| "- Use null if missing" | |
| ), | |
| expected_output="Valid JSON only.", | |
| agent=json_architect, | |
| context=[extraction_task], | |
| ) | |
| crew = Crew( | |
| agents=[visual_reader, json_architect], | |
| tasks=[extraction_task, formatting_task], | |
| process=Process.sequential, | |
| verbose=True, | |
| ) | |
| return crew.kickoff() | |