Aethero_github / Aethero_App /src /aeth_ingest.py
xvadur's picture
Add complete Aethero_App and aethero_protocol directories
46f737d
"""
AetheroOS Memory Ingestion Agent
===============================
This module handles the ingestion of memories into the AetheroOS system, generating
ritualized ministerial reports with metadata, tags, and optional PDF output.
Features:
- Multiple input formats (text, file, JSON)
- Automated tag generation
- Templated report generation
- Multiple output formats (MD, JSON, PDF)
- Blackbox validation integration
Usage:
python aeth_ingest.py --text "Memory content"
python aeth_ingest.py --file input.txt
python aeth_ingest.py --json '{"content": "Memory"}'
"""
import os
import uuid
import argparse
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Union, Any
from jinja2 import Template, TemplateError
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger("aeth_ingest")
# Constants
REPORTS_DIR = Path("./aeth_mem_reports/")
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
# Default Jinja2 template for ministerial reports
DEFAULT_TEMPLATE = """
### AETHEROOS MINISTERIAL REPORT
**Office of Memory Ingestion**
**Ref. Code**: {{ ref_code }}
---
**Date**: {{ date }}
**Author**: {{ author }}
**Tags**: {{ tags }}
**Source**: {{ source }}
---
#### **🪶 CONTENT**
{{ content }}
---
#### **🪶 INFERRED TAGS**
- Intent Vector: {{ inferred_tags.intent_vector }}
- Mental State: {{ inferred_tags.mental_state }}
- Emotion Tone: {{ inferred_tags.emotion_tone }}
---
**Ministerial Seal**: [ ⚜️ ]
"""
class IngestionError(Exception):
"""Base exception for ingestion-related errors."""
pass
def parse_input(
input_path: Optional[str] = None,
input_json: Optional[Dict] = None,
input_text: Optional[str] = None
) -> str:
"""
Parse input from file, JSON payload, or direct text.
Args:
input_path: Path to input file (.txt, .md, .json)
input_json: JSON payload as dictionary
input_text: Direct text input
Returns:
str: Parsed content
Raises:
IngestionError: If no valid input is provided or input cannot be parsed
"""
try:
if input_path:
logger.info(f"Reading input from file: {input_path}")
with open(input_path, 'r', encoding='utf-8') as f:
content = f.read()
elif input_json:
logger.info("Parsing JSON input")
content = json.dumps(input_json, indent=4)
elif input_text:
logger.info("Using direct text input")
content = input_text
else:
raise IngestionError("No valid input provided")
if not content.strip():
raise IngestionError("Input content is empty")
return content
except (IOError, json.JSONDecodeError) as e:
raise IngestionError(f"Failed to parse input: {str(e)}")
def generate_tags(content: str) -> Dict[str, str]:
"""
Generate ASL tags based on content analysis.
Args:
content: Text content to analyze
Returns:
dict: Generated tags including intent_vector, mental_state, and emotion_tone
"""
logger.debug("Generating tags for content")
# Initialize with neutral defaults
tags = {
"intent_vector": "analysis",
"mental_state": "focused",
"emotion_tone": "neutral"
}
# Basic content analysis
content_lower = content.lower()
# Intent vector detection
if any(word in content_lower for word in ["analyze", "examine", "study"]):
tags["intent_vector"] = "analysis"
elif any(word in content_lower for word in ["create", "generate", "build"]):
tags["intent_vector"] = "creation"
elif any(word in content_lower for word in ["fix", "repair", "solve"]):
tags["intent_vector"] = "resolution"
# Mental state detection
if any(word in content_lower for word in ["error", "warning", "issue"]):
tags["mental_state"] = "alert"
elif any(word in content_lower for word in ["success", "complete", "done"]):
tags["mental_state"] = "satisfied"
# Emotion tone detection
if any(word in content_lower for word in ["error", "fail", "issue"]):
tags["emotion_tone"] = "concerned"
elif any(word in content_lower for word in ["success", "excellent", "perfect"]):
tags["emotion_tone"] = "positive"
logger.debug(f"Generated tags: {tags}")
return tags
def render_report(
content: str,
metadata: Dict[str, Any],
template_path: Optional[str] = None
) -> str:
"""
Render content and metadata into a ritualized report using Jinja2 templates.
Args:
content: Report content
metadata: Report metadata including ref_code, date, author, etc.
template_path: Optional path to custom template file
Returns:
str: Rendered report content
Raises:
IngestionError: If template rendering fails or metadata is invalid
"""
# Validate required metadata fields
required_fields = ["ref_code", "date", "author", "tags", "source", "inferred_tags"]
missing_fields = [field for field in required_fields if field not in metadata]
if missing_fields:
raise IngestionError(f"Missing required metadata fields: {', '.join(missing_fields)}")
try:
if template_path:
logger.info(f"Using custom template: {template_path}")
with open(template_path, 'r', encoding='utf-8') as f:
template = Template(f.read())
else:
logger.info("Using default template")
template = Template(DEFAULT_TEMPLATE)
# Convert tags to string if present, otherwise use empty string
tags_str = ", ".join(metadata.get("tags", []))
rendered = template.render(
content=content,
ref_code=metadata["ref_code"],
date=metadata["date"],
author=metadata["author"],
tags=tags_str,
source=metadata["source"],
inferred_tags=metadata["inferred_tags"]
)
if not rendered.strip():
raise IngestionError("Template rendered empty content")
return rendered
except (IOError, TemplateError) as e:
raise IngestionError(f"Failed to render report: {str(e)}")
def save_report(
content: str,
metadata: Dict[str, Any],
as_pdf: bool = False
) -> Dict[str, Optional[str]]:
"""
Save the report in multiple formats (MD, JSON, optionally PDF).
Args:
content: Report content
metadata: Report metadata
as_pdf: Whether to generate PDF output
Returns:
dict: Paths to saved files
Raises:
IngestionError: If saving fails
"""
try:
ref_code = metadata["ref_code"]
file_base = REPORTS_DIR / ref_code
saved_files = {"markdown": None, "json": None, "pdf": None}
# Save Markdown
md_path = f"{file_base}.md"
logger.info(f"Saving markdown to: {md_path}")
with open(md_path, "w", encoding="utf-8") as f:
f.write(content)
saved_files["markdown"] = str(md_path)
# Save JSON metadata
json_path = f"{file_base}.json"
logger.info(f"Saving metadata to: {json_path}")
with open(json_path, "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=4)
saved_files["json"] = str(json_path)
# Save PDF if requested
if as_pdf:
try:
import pdfkit
pdf_path = f"{file_base}.pdf"
logger.info(f"Generating PDF: {pdf_path}")
pdfkit.from_string(content, pdf_path)
saved_files["pdf"] = str(pdf_path)
except ImportError:
logger.warning("pdfkit not installed - skipping PDF generation")
except Exception as e:
logger.error(f"PDF generation failed: {str(e)}")
return saved_files
except Exception as e:
raise IngestionError(f"Failed to save report: {str(e)}")
def trigger_blackbox(report_path: str) -> None:
"""
Trigger Blackbox validation subprocess.
Args:
report_path: Path to the report file to validate
"""
logger.info(f"Triggering Blackbox validation for: {report_path}")
# TODO: Implement actual Blackbox integration
# Example: subprocess.run(["blackbox", "--analyze", report_path])
def main() -> None:
"""Main entry point for the AetheroOS Memory Ingestion Agent."""
parser = argparse.ArgumentParser(
description="AetheroOS Memory Ingestion Agent",
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("--text", type=str, help="Input text to ingest")
parser.add_argument("--file", type=str, help="Input file path (.txt, .md, .json)")
parser.add_argument("--json", type=json.loads, help="Input JSON payload")
parser.add_argument("--ref_code", type=str, help="Custom reference code")
parser.add_argument("--author", type=str, default="AetheroGPT",
help="Author of the report")
parser.add_argument("--tags", type=str, nargs="*", default=[],
help="Custom tags for the report")
parser.add_argument("--source", type=str, default="unknown",
help="Source of the content")
parser.add_argument("--template", type=str,
help="Custom Jinja2 template path")
parser.add_argument("--validate", action="store_true",
help="Trigger Blackbox validation")
parser.add_argument("--pdf", action="store_true",
help="Generate PDF output")
parser.add_argument("--debug", action="store_true",
help="Enable debug logging")
args = parser.parse_args()
# Configure debug logging if requested
if args.debug:
logger.setLevel(logging.DEBUG)
try:
# Parse input
content = parse_input(
input_path=args.file,
input_json=args.json,
input_text=args.text
)
# Generate metadata
ref_code = args.ref_code or f"AETH-MEM-{datetime.now().strftime('%Y')}-{str(uuid.uuid4().int)[:4]}"
metadata = {
"ref_code": ref_code,
"date": datetime.now().strftime("%Y-%m-%d"),
"author": args.author,
"tags": args.tags,
"source": args.source,
"inferred_tags": generate_tags(content)
}
# Render report
rendered_content = render_report(
content,
metadata,
template_path=args.template
)
# Save report
saved_files = save_report(rendered_content, metadata, as_pdf=args.pdf)
logger.info(f"Report saved: {saved_files}")
# Trigger Blackbox if validation is requested
if args.validate:
trigger_blackbox(saved_files["markdown"])
except IngestionError as e:
logger.error(f"Ingestion failed: {str(e)}")
exit(1)
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
exit(1)
if __name__ == "__main__":
main()