Spaces:

sujataprakashdatycs
/

Info

Sleeping

App Files Files Community

Info / information_extraction.py

sujataprakashdatycs

Update information_extraction.py

1e0ce5e verified 2 months ago

raw

history blame contribute delete

3.76 kB

	# =========================================
	# information_extraction.py
	# CrewAI + OpenAI Vision Logic
	# =========================================

	import base64
	from crewai import Agent, Task, Crew, Process
	from crewai.tools import tool
	from langchain_openai import ChatOpenAI
	from openai import OpenAI

	# OpenAI client for vision
	vision_client = OpenAI()

	# -----------------------------------------
	# Vision Tool
	# -----------------------------------------
	@tool("Invoice Image Reader")
	def read_invoice_image(image_path: str) -> str:
	"""
	Reads an invoice image and extracts raw invoice text
	using OpenAI Vision (Responses API).
	"""

	with open(image_path, "rb") as f:
	image_base64 = base64.b64encode(f.read()).decode("utf-8")

	response = vision_client.responses.create(
	model="gpt-4.1-mini",
	input=[
	{
	"role": "user",
	"content": [
	{
	"type": "input_text",
	"text": (
	"Extract vendor name, tax id, invoice number, "
	"invoice date, items table (description, quantity, net price), "
	"and total gross from this invoice."
	),
	},
	{
	"type": "input_image",
	"image_url": f"data:image/jpeg;base64,{image_base64}",
	},
	],
	}
	],
	)

	return response.output_text


	# -----------------------------------------
	# Main Extraction Function
	# -----------------------------------------
	def extract_invoice(image_path: str):
	"""
	Main entry point used by Gradio / API
	"""

	llm = ChatOpenAI(
	model="gpt-4.1-mini",
	temperature=0
	)

	# Agent 1: OCR
	visual_reader = Agent(
	role="OCR Specialist",
	goal="Extract invoice data from images",
	backstory=(
	"You cannot see images directly. "
	"You must ALWAYS use the Invoice Image Reader tool."
	),
	tools=[read_invoice_image],
	llm=llm,
	verbose=True,
	)

	# Agent 2: JSON Formatter
	json_architect = Agent(
	role="Data Engineer",
	goal="Convert extracted invoice text into structured JSON",
	backstory="You normalize numbers and dates and output strict JSON.",
	llm=llm,
	verbose=False,
	)

	# Task 1
	extraction_task = Task(
	description=(
	f"Use the Invoice Image Reader tool to read the invoice image "
	f"at path '{image_path}'. Extract vendor, tax id, invoice number, "
	f"date, item rows, and total gross."
	),
	expected_output="Structured invoice text.",
	agent=visual_reader,
	)

	# Task 2
	formatting_task = Task(
	description=(
	"Convert the extracted invoice text into JSON:\n\n"
	"{\n"
	" 'invoice_no': str,\n"
	" 'date': 'YYYY-MM-DD',\n"
	" 'vendor': {'name': str, 'tax_id': str},\n"
	" 'items': [{'desc': str, 'qty': float, 'net': float}],\n"
	" 'total_gross': float\n"
	"}\n\n"
	"Rules:\n"
	"- Replace commas with dots in numbers\n"
	"- Output ONLY valid JSON\n"
	"- Use null if missing"
	),
	expected_output="Valid JSON only.",
	agent=json_architect,
	context=[extraction_task],
	)

	crew = Crew(
	agents=[visual_reader, json_architect],
	tasks=[extraction_task, formatting_task],
	process=Process.sequential,
	verbose=True,
	)

	return crew.kickoff()