Spaces:

prashantmatlani
/

coderg

Sleeping

App Files Files Community

coderg / perception_agent.py

prashantmatlani

updated file_agent execute condition

5744898 about 2 months ago

Raw

History Blame Contribute Delete

5.51 kB


	# ./perception_agent.py

	import os
	import pandas as pd
	from docx import Document
	from pypdf import PdfReader # Cleanly leverages your requirements.txt package
	from groq import Groq
	from agent_logging import log_agent_action

	#model = "llama-3.2-11b-vision-preview" # -> Decommissioned
	model = "meta-llama/llama-4-scout-17b-16e-instruct"
	client = Groq(api_key=os.getenv("GROQ_API_KEY"))

	def read_image_file(file_path):
	"""Uses Groq Vision capability to interpret images (.png, .jpg, .bmp)"""
	import base64
	try:
	log_agent_action("PERCEPTION", f"Encoding image for Vision API: {file_path}")
	with open(file_path, "rb") as image_file:
	encoded_string = base64.b64encode(image_file.read()).decode('utf-8')

	ext = os.path.splitext(file_path)[1].lower().replace(".", "")
	mime_type = f"image/{ext}" if ext != "jpg" else "image/jpeg"

	response = client.chat.completions.create(
	model=model,
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "Analyze this technical image. Extract all code, data tables, structural diagrams, or text precisely."},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:{mime_type};base64,{encoded_string}"
	}
	}
	]
	}
	],
	temperature=0.2
	)
	log_agent_action("PERCEPTION_SUCCESS", f"Vision extraction complete for {file_path}")
	return f"\n--- Visual Content Extraction from {os.path.basename(file_path)} ---\n{response.choices[0].message.content}\n"
	except Exception as e:
	log_agent_action("PERCEPTION_ERROR", f"Vision interpretation failed: {str(e)}")
	return f"\n[Vision Error processing image {os.path.basename(file_path)}: {str(e)}]\n"

	def read_document_file(file_path):
	"""Universal router parsing text, code, spreadsheets, PDFs, and document assets"""
	ext = os.path.splitext(file_path)[1].lower()
	filename = os.path.basename(file_path)

	try:
	# 1. Plain Text and Markdown Layouts
	if ext in ['.txt', '.md', '.py', '.json', '.yaml', '.toml', '.css', '.html']:
	log_agent_action("PERCEPTION", f"Reading plaintext structure: {filename}")
	with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
	return f"\n--- Content of File: {filename} ---\n{f.read()}\n"

	# 2. Excel Data Configurations
	elif ext in ['.xlsx', '.xls']:
	log_agent_action("PERCEPTION", f"Parsing Data Spreadsheet: {filename}")
	excel_data = pd.read_excel(file_path, sheet_name=None)
	combined_text = f"\n--- Spreadsheet Matrix Extraction: {filename} ---\n"
	for sheet_name, df in excel_data.items():
	combined_text += f"\nSheet: {sheet_name}\n"
	combined_text += df.to_markdown(index=False) + "\n"
	return combined_text

	# 3. Microsoft Word Processing
	elif ext == '.docx':
	log_agent_action("PERCEPTION", f"Extracting structural Word paragraphs: {filename}")
	doc = Document(file_path)
	paragraphs = [p.text for p in doc.paragraphs]
	return f"\n--- Document Text Extraction: {filename} ---\n" + "\n".join(paragraphs) + "\n"

	# 4. Portable Document Format (.pdf) Ingestion
	elif ext == '.pdf':
	log_agent_action("PERCEPTION", f"Initializing pypdf reader pipeline: {filename}")
	reader = PdfReader(file_path)
	pdf_text_buffer = []

	for index, page in enumerate(reader.pages):
	extracted_page_text = page.extract_text()
	if extracted_page_text:
	pdf_text_buffer.append(f"--- Page {index + 1} ---\n{extracted_page_text}")

	if not pdf_text_buffer:
	log_agent_action("PERCEPTION_WARN", f"PDF contained no raw text layers (possible raw scan): {filename}")
	return f"\n[System Warning: '{filename}' appears to be an un-OCRed scanned image PDF. Please extract its pages as raw images for CoderG's Vision layer.]\n"

	log_agent_action("PERCEPTION_SUCCESS", f"Successfully parsed {len(pdf_text_buffer)} pages from {filename}")
	return f"\n--- PDF Document Content Ingestion: {filename} ---\n" + "\n".join(pdf_text_buffer) + "\n"

	# 5. Image Vector/Raster Formats
	elif ext in ['.png', '.jpg', '.jpeg', '.bmp']:
	return read_image_file(file_path)

	elif ext == '.doc':
	log_agent_action("PERCEPTION_WARN", f"Legacy format encountered: {filename}")
	return f"\n[System Error: Legacy format '{ext}' detected. Please convert '{filename}' to '.docx' for automated ingestion.]\n"

	else:
	log_agent_action("PERCEPTION_WARN", f"Unknown asset extension skipped: {filename}")
	return f"\n[System Warning: Unsupported file format '{ext}' for file '{filename}'. Skipping content ingestion.]\n"

	except Exception as e:
	log_agent_action("PERCEPTION_ERROR", f"Failed parsing {filename}: {str(e)}")
	return f"\n[Error processing document asset {filename}: {str(e)}]\n"