# ./perception_agent.py import os import pandas as pd from docx import Document from pypdf import PdfReader # Cleanly leverages your requirements.txt package from groq import Groq from agent_logging import log_agent_action #model = "llama-3.2-11b-vision-preview" # -> Decommissioned model = "meta-llama/llama-4-scout-17b-16e-instruct" client = Groq(api_key=os.getenv("GROQ_API_KEY")) def read_image_file(file_path): """Uses Groq Vision capability to interpret images (.png, .jpg, .bmp)""" import base64 try: log_agent_action("PERCEPTION", f"Encoding image for Vision API: {file_path}") with open(file_path, "rb") as image_file: encoded_string = base64.b64encode(image_file.read()).decode('utf-8') ext = os.path.splitext(file_path)[1].lower().replace(".", "") mime_type = f"image/{ext}" if ext != "jpg" else "image/jpeg" response = client.chat.completions.create( model=model, messages=[ { "role": "user", "content": [ {"type": "text", "text": "Analyze this technical image. Extract all code, data tables, structural diagrams, or text precisely."}, { "type": "image_url", "image_url": { "url": f"data:{mime_type};base64,{encoded_string}" } } ] } ], temperature=0.2 ) log_agent_action("PERCEPTION_SUCCESS", f"Vision extraction complete for {file_path}") return f"\n--- Visual Content Extraction from {os.path.basename(file_path)} ---\n{response.choices[0].message.content}\n" except Exception as e: log_agent_action("PERCEPTION_ERROR", f"Vision interpretation failed: {str(e)}") return f"\n[Vision Error processing image {os.path.basename(file_path)}: {str(e)}]\n" def read_document_file(file_path): """Universal router parsing text, code, spreadsheets, PDFs, and document assets""" ext = os.path.splitext(file_path)[1].lower() filename = os.path.basename(file_path) try: # 1. Plain Text and Markdown Layouts if ext in ['.txt', '.md', '.py', '.json', '.yaml', '.toml', '.css', '.html']: log_agent_action("PERCEPTION", f"Reading plaintext structure: {filename}") with open(file_path, "r", encoding="utf-8", errors="ignore") as f: return f"\n--- Content of File: {filename} ---\n{f.read()}\n" # 2. Excel Data Configurations elif ext in ['.xlsx', '.xls']: log_agent_action("PERCEPTION", f"Parsing Data Spreadsheet: {filename}") excel_data = pd.read_excel(file_path, sheet_name=None) combined_text = f"\n--- Spreadsheet Matrix Extraction: {filename} ---\n" for sheet_name, df in excel_data.items(): combined_text += f"\nSheet: {sheet_name}\n" combined_text += df.to_markdown(index=False) + "\n" return combined_text # 3. Microsoft Word Processing elif ext == '.docx': log_agent_action("PERCEPTION", f"Extracting structural Word paragraphs: {filename}") doc = Document(file_path) paragraphs = [p.text for p in doc.paragraphs] return f"\n--- Document Text Extraction: {filename} ---\n" + "\n".join(paragraphs) + "\n" # 4. Portable Document Format (.pdf) Ingestion elif ext == '.pdf': log_agent_action("PERCEPTION", f"Initializing pypdf reader pipeline: {filename}") reader = PdfReader(file_path) pdf_text_buffer = [] for index, page in enumerate(reader.pages): extracted_page_text = page.extract_text() if extracted_page_text: pdf_text_buffer.append(f"--- Page {index + 1} ---\n{extracted_page_text}") if not pdf_text_buffer: log_agent_action("PERCEPTION_WARN", f"PDF contained no raw text layers (possible raw scan): {filename}") return f"\n[System Warning: '{filename}' appears to be an un-OCRed scanned image PDF. Please extract its pages as raw images for CoderG's Vision layer.]\n" log_agent_action("PERCEPTION_SUCCESS", f"Successfully parsed {len(pdf_text_buffer)} pages from {filename}") return f"\n--- PDF Document Content Ingestion: {filename} ---\n" + "\n".join(pdf_text_buffer) + "\n" # 5. Image Vector/Raster Formats elif ext in ['.png', '.jpg', '.jpeg', '.bmp']: return read_image_file(file_path) elif ext == '.doc': log_agent_action("PERCEPTION_WARN", f"Legacy format encountered: {filename}") return f"\n[System Error: Legacy format '{ext}' detected. Please convert '{filename}' to '.docx' for automated ingestion.]\n" else: log_agent_action("PERCEPTION_WARN", f"Unknown asset extension skipped: {filename}") return f"\n[System Warning: Unsupported file format '{ext}' for file '{filename}'. Skipping content ingestion.]\n" except Exception as e: log_agent_action("PERCEPTION_ERROR", f"Failed parsing {filename}: {str(e)}") return f"\n[Error processing document asset {filename}: {str(e)}]\n"