Info / information_extraction.py
sujataprakashdatycs's picture
Update information_extraction.py
1e0ce5e verified
# =========================================
# information_extraction.py
# CrewAI + OpenAI Vision Logic
# =========================================
import base64
from crewai import Agent, Task, Crew, Process
from crewai.tools import tool
from langchain_openai import ChatOpenAI
from openai import OpenAI
# OpenAI client for vision
vision_client = OpenAI()
# -----------------------------------------
# Vision Tool
# -----------------------------------------
@tool("Invoice Image Reader")
def read_invoice_image(image_path: str) -> str:
"""
Reads an invoice image and extracts raw invoice text
using OpenAI Vision (Responses API).
"""
with open(image_path, "rb") as f:
image_base64 = base64.b64encode(f.read()).decode("utf-8")
response = vision_client.responses.create(
model="gpt-4.1-mini",
input=[
{
"role": "user",
"content": [
{
"type": "input_text",
"text": (
"Extract vendor name, tax id, invoice number, "
"invoice date, items table (description, quantity, net price), "
"and total gross from this invoice."
),
},
{
"type": "input_image",
"image_url": f"data:image/jpeg;base64,{image_base64}",
},
],
}
],
)
return response.output_text
# -----------------------------------------
# Main Extraction Function
# -----------------------------------------
def extract_invoice(image_path: str):
"""
Main entry point used by Gradio / API
"""
llm = ChatOpenAI(
model="gpt-4.1-mini",
temperature=0
)
# Agent 1: OCR
visual_reader = Agent(
role="OCR Specialist",
goal="Extract invoice data from images",
backstory=(
"You cannot see images directly. "
"You must ALWAYS use the Invoice Image Reader tool."
),
tools=[read_invoice_image],
llm=llm,
verbose=True,
)
# Agent 2: JSON Formatter
json_architect = Agent(
role="Data Engineer",
goal="Convert extracted invoice text into structured JSON",
backstory="You normalize numbers and dates and output strict JSON.",
llm=llm,
verbose=False,
)
# Task 1
extraction_task = Task(
description=(
f"Use the Invoice Image Reader tool to read the invoice image "
f"at path '{image_path}'. Extract vendor, tax id, invoice number, "
f"date, item rows, and total gross."
),
expected_output="Structured invoice text.",
agent=visual_reader,
)
# Task 2
formatting_task = Task(
description=(
"Convert the extracted invoice text into JSON:\n\n"
"{\n"
" 'invoice_no': str,\n"
" 'date': 'YYYY-MM-DD',\n"
" 'vendor': {'name': str, 'tax_id': str},\n"
" 'items': [{'desc': str, 'qty': float, 'net': float}],\n"
" 'total_gross': float\n"
"}\n\n"
"Rules:\n"
"- Replace commas with dots in numbers\n"
"- Output ONLY valid JSON\n"
"- Use null if missing"
),
expected_output="Valid JSON only.",
agent=json_architect,
context=[extraction_task],
)
crew = Crew(
agents=[visual_reader, json_architect],
tasks=[extraction_task, formatting_task],
process=Process.sequential,
verbose=True,
)
return crew.kickoff()