coderg / perception_agent.py
prashantmatlani's picture
updated file_agent execute condition
5744898
Raw
History Blame Contribute Delete
5.51 kB
# ./perception_agent.py
import os
import pandas as pd
from docx import Document
from pypdf import PdfReader # Cleanly leverages your requirements.txt package
from groq import Groq
from agent_logging import log_agent_action
#model = "llama-3.2-11b-vision-preview" # -> Decommissioned
model = "meta-llama/llama-4-scout-17b-16e-instruct"
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
def read_image_file(file_path):
"""Uses Groq Vision capability to interpret images (.png, .jpg, .bmp)"""
import base64
try:
log_agent_action("PERCEPTION", f"Encoding image for Vision API: {file_path}")
with open(file_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
ext = os.path.splitext(file_path)[1].lower().replace(".", "")
mime_type = f"image/{ext}" if ext != "jpg" else "image/jpeg"
response = client.chat.completions.create(
model=model,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Analyze this technical image. Extract all code, data tables, structural diagrams, or text precisely."},
{
"type": "image_url",
"image_url": {
"url": f"data:{mime_type};base64,{encoded_string}"
}
}
]
}
],
temperature=0.2
)
log_agent_action("PERCEPTION_SUCCESS", f"Vision extraction complete for {file_path}")
return f"\n--- Visual Content Extraction from {os.path.basename(file_path)} ---\n{response.choices[0].message.content}\n"
except Exception as e:
log_agent_action("PERCEPTION_ERROR", f"Vision interpretation failed: {str(e)}")
return f"\n[Vision Error processing image {os.path.basename(file_path)}: {str(e)}]\n"
def read_document_file(file_path):
"""Universal router parsing text, code, spreadsheets, PDFs, and document assets"""
ext = os.path.splitext(file_path)[1].lower()
filename = os.path.basename(file_path)
try:
# 1. Plain Text and Markdown Layouts
if ext in ['.txt', '.md', '.py', '.json', '.yaml', '.toml', '.css', '.html']:
log_agent_action("PERCEPTION", f"Reading plaintext structure: {filename}")
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
return f"\n--- Content of File: {filename} ---\n{f.read()}\n"
# 2. Excel Data Configurations
elif ext in ['.xlsx', '.xls']:
log_agent_action("PERCEPTION", f"Parsing Data Spreadsheet: {filename}")
excel_data = pd.read_excel(file_path, sheet_name=None)
combined_text = f"\n--- Spreadsheet Matrix Extraction: {filename} ---\n"
for sheet_name, df in excel_data.items():
combined_text += f"\nSheet: {sheet_name}\n"
combined_text += df.to_markdown(index=False) + "\n"
return combined_text
# 3. Microsoft Word Processing
elif ext == '.docx':
log_agent_action("PERCEPTION", f"Extracting structural Word paragraphs: {filename}")
doc = Document(file_path)
paragraphs = [p.text for p in doc.paragraphs]
return f"\n--- Document Text Extraction: {filename} ---\n" + "\n".join(paragraphs) + "\n"
# 4. Portable Document Format (.pdf) Ingestion
elif ext == '.pdf':
log_agent_action("PERCEPTION", f"Initializing pypdf reader pipeline: {filename}")
reader = PdfReader(file_path)
pdf_text_buffer = []
for index, page in enumerate(reader.pages):
extracted_page_text = page.extract_text()
if extracted_page_text:
pdf_text_buffer.append(f"--- Page {index + 1} ---\n{extracted_page_text}")
if not pdf_text_buffer:
log_agent_action("PERCEPTION_WARN", f"PDF contained no raw text layers (possible raw scan): {filename}")
return f"\n[System Warning: '{filename}' appears to be an un-OCRed scanned image PDF. Please extract its pages as raw images for CoderG's Vision layer.]\n"
log_agent_action("PERCEPTION_SUCCESS", f"Successfully parsed {len(pdf_text_buffer)} pages from {filename}")
return f"\n--- PDF Document Content Ingestion: {filename} ---\n" + "\n".join(pdf_text_buffer) + "\n"
# 5. Image Vector/Raster Formats
elif ext in ['.png', '.jpg', '.jpeg', '.bmp']:
return read_image_file(file_path)
elif ext == '.doc':
log_agent_action("PERCEPTION_WARN", f"Legacy format encountered: {filename}")
return f"\n[System Error: Legacy format '{ext}' detected. Please convert '{filename}' to '.docx' for automated ingestion.]\n"
else:
log_agent_action("PERCEPTION_WARN", f"Unknown asset extension skipped: {filename}")
return f"\n[System Warning: Unsupported file format '{ext}' for file '{filename}'. Skipping content ingestion.]\n"
except Exception as e:
log_agent_action("PERCEPTION_ERROR", f"Failed parsing {filename}: {str(e)}")
return f"\n[Error processing document asset {filename}: {str(e)}]\n"