CyberLegalAIendpoint / tests /test_pdf_analyzer.py
Charles Grandjean
solve tests
8cc8e89
#!/usr/bin/env python3
"""
Test script for PDF Analyzer Agent
"""
import asyncio
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from mistralai import Mistral
from agents.pdf_analyzer import PDFAnalyzerAgent
load_dotenv()
async def main():
"""Test the PDF analyzer agent"""
# Initialize LLM (Cerebras via OpenAI-compatible endpoint)
if os.getenv("OPENAI_API_KEY"):
llm = ChatOpenAI(
model=os.getenv("LLM_MODEL", "gpt-5-nano-2025-08-07"),
reasoning_effort="low",
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("LLM_BINDING_HOST", "https://api.openai.com/v1"),
default_headers={"X-Cerebras-3rd-Party-Integration": "langgraph"}
)
else:
print("❌ No API key found. Please set OPENAI_API_KEY in .env file")
return
# Initialize Mistral client for OCR (optional)
mistral_client = None
if os.getenv("MISTRAL_API_KEY"):
mistral_client = Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
# Create PDF analyzer agent
agent = PDFAnalyzerAgent(llm, mistral_client=mistral_client)
# Provide path to your PDF file
pdf_path = "PublicWaterMassMailing.pdf"
# Check if file exists
if not os.path.exists(pdf_path):
print(f"\n❌ PDF file not found: {pdf_path}")
return
print(f"\n{'='*80}")
print(f"PDF ANALYSIS FOR: {pdf_path}")
print(f"{'='*80}\n")
try:
# Analyze the PDF
result = await agent.analyze_pdf(pdf_path)
# Display results
if result.get("actors"):
print(f"\n{'─'*80}")
print("πŸ‘₯ ACTORS INVOLVED")
print(f"{'─'*80}")
print(result["actors"])
if result.get("key_details"):
print(f"\n{'─'*80}")
print("πŸ”‘ KEY DETAILS")
print(f"{'─'*80}")
print(result["key_details"])
if result.get("summary"):
print(f"\n{'─'*80}")
print("πŸ“„ HIGH-LEVEL SUMMARY")
print(f"{'─'*80}")
print(result["summary"])
print(f"\n{'='*80}")
print(f"βœ… ANALYSIS COMPLETE - Status: {result.get('processing_status')}")
if result.get("ocr_used"):
print(f"πŸ“· OCR was used: {result.get('ocr_method')}")
print(f"{'='*80}\n")
except Exception as e:
print(f"\n❌ Error during PDF analysis: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
asyncio.run(main())