Spaces:
Sleeping
Sleeping
| # ./perception_agent.py | |
| import os | |
| import pandas as pd | |
| from docx import Document | |
| from pypdf import PdfReader # Cleanly leverages your requirements.txt package | |
| from groq import Groq | |
| from agent_logging import log_agent_action | |
| #model = "llama-3.2-11b-vision-preview" # -> Decommissioned | |
| model = "meta-llama/llama-4-scout-17b-16e-instruct" | |
| client = Groq(api_key=os.getenv("GROQ_API_KEY")) | |
| def read_image_file(file_path): | |
| """Uses Groq Vision capability to interpret images (.png, .jpg, .bmp)""" | |
| import base64 | |
| try: | |
| log_agent_action("PERCEPTION", f"Encoding image for Vision API: {file_path}") | |
| with open(file_path, "rb") as image_file: | |
| encoded_string = base64.b64encode(image_file.read()).decode('utf-8') | |
| ext = os.path.splitext(file_path)[1].lower().replace(".", "") | |
| mime_type = f"image/{ext}" if ext != "jpg" else "image/jpeg" | |
| response = client.chat.completions.create( | |
| model=model, | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": "Analyze this technical image. Extract all code, data tables, structural diagrams, or text precisely."}, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:{mime_type};base64,{encoded_string}" | |
| } | |
| } | |
| ] | |
| } | |
| ], | |
| temperature=0.2 | |
| ) | |
| log_agent_action("PERCEPTION_SUCCESS", f"Vision extraction complete for {file_path}") | |
| return f"\n--- Visual Content Extraction from {os.path.basename(file_path)} ---\n{response.choices[0].message.content}\n" | |
| except Exception as e: | |
| log_agent_action("PERCEPTION_ERROR", f"Vision interpretation failed: {str(e)}") | |
| return f"\n[Vision Error processing image {os.path.basename(file_path)}: {str(e)}]\n" | |
| def read_document_file(file_path): | |
| """Universal router parsing text, code, spreadsheets, PDFs, and document assets""" | |
| ext = os.path.splitext(file_path)[1].lower() | |
| filename = os.path.basename(file_path) | |
| try: | |
| # 1. Plain Text and Markdown Layouts | |
| if ext in ['.txt', '.md', '.py', '.json', '.yaml', '.toml', '.css', '.html']: | |
| log_agent_action("PERCEPTION", f"Reading plaintext structure: {filename}") | |
| with open(file_path, "r", encoding="utf-8", errors="ignore") as f: | |
| return f"\n--- Content of File: {filename} ---\n{f.read()}\n" | |
| # 2. Excel Data Configurations | |
| elif ext in ['.xlsx', '.xls']: | |
| log_agent_action("PERCEPTION", f"Parsing Data Spreadsheet: {filename}") | |
| excel_data = pd.read_excel(file_path, sheet_name=None) | |
| combined_text = f"\n--- Spreadsheet Matrix Extraction: {filename} ---\n" | |
| for sheet_name, df in excel_data.items(): | |
| combined_text += f"\nSheet: {sheet_name}\n" | |
| combined_text += df.to_markdown(index=False) + "\n" | |
| return combined_text | |
| # 3. Microsoft Word Processing | |
| elif ext == '.docx': | |
| log_agent_action("PERCEPTION", f"Extracting structural Word paragraphs: {filename}") | |
| doc = Document(file_path) | |
| paragraphs = [p.text for p in doc.paragraphs] | |
| return f"\n--- Document Text Extraction: {filename} ---\n" + "\n".join(paragraphs) + "\n" | |
| # 4. Portable Document Format (.pdf) Ingestion | |
| elif ext == '.pdf': | |
| log_agent_action("PERCEPTION", f"Initializing pypdf reader pipeline: {filename}") | |
| reader = PdfReader(file_path) | |
| pdf_text_buffer = [] | |
| for index, page in enumerate(reader.pages): | |
| extracted_page_text = page.extract_text() | |
| if extracted_page_text: | |
| pdf_text_buffer.append(f"--- Page {index + 1} ---\n{extracted_page_text}") | |
| if not pdf_text_buffer: | |
| log_agent_action("PERCEPTION_WARN", f"PDF contained no raw text layers (possible raw scan): {filename}") | |
| return f"\n[System Warning: '{filename}' appears to be an un-OCRed scanned image PDF. Please extract its pages as raw images for CoderG's Vision layer.]\n" | |
| log_agent_action("PERCEPTION_SUCCESS", f"Successfully parsed {len(pdf_text_buffer)} pages from {filename}") | |
| return f"\n--- PDF Document Content Ingestion: {filename} ---\n" + "\n".join(pdf_text_buffer) + "\n" | |
| # 5. Image Vector/Raster Formats | |
| elif ext in ['.png', '.jpg', '.jpeg', '.bmp']: | |
| return read_image_file(file_path) | |
| elif ext == '.doc': | |
| log_agent_action("PERCEPTION_WARN", f"Legacy format encountered: {filename}") | |
| return f"\n[System Error: Legacy format '{ext}' detected. Please convert '{filename}' to '.docx' for automated ingestion.]\n" | |
| else: | |
| log_agent_action("PERCEPTION_WARN", f"Unknown asset extension skipped: {filename}") | |
| return f"\n[System Warning: Unsupported file format '{ext}' for file '{filename}'. Skipping content ingestion.]\n" | |
| except Exception as e: | |
| log_agent_action("PERCEPTION_ERROR", f"Failed parsing {filename}: {str(e)}") | |
| return f"\n[Error processing document asset {filename}: {str(e)}]\n" |