| | """ |
| | Debug script to test document analysis extraction |
| | """ |
| | import asyncio |
| | import sys |
| | from pathlib import Path |
| |
|
| | |
| | sys.path.insert(0, str(Path(__file__).parent)) |
| |
|
| | from src.llm.langchain_ollama_client import get_langchain_client |
| | from src.agents.scenario1.document_analysis_agent import DocumentAnalysisAgent |
| | from loguru import logger |
| |
|
| | async def main(): |
| | |
| | patent_path = "uploads/patents" |
| |
|
| | |
| | patent_files = list(Path(patent_path).glob("*.pdf")) |
| | if not patent_files: |
| | logger.error(f"No patent PDFs found in {patent_path}") |
| | return |
| |
|
| | test_patent = str(patent_files[0]) |
| | logger.info(f"Testing with patent: {test_patent}") |
| |
|
| | |
| | llm_client = get_langchain_client(default_complexity='standard') |
| | agent = DocumentAnalysisAgent(llm_client) |
| |
|
| | |
| | logger.info("Step 1: Extracting text...") |
| | patent_text = await agent._extract_patent_text(test_patent) |
| | logger.info(f"Extracted text length: {len(patent_text)} characters") |
| | logger.info(f"First 500 chars: {patent_text[:500]}") |
| |
|
| | |
| | logger.info("\nStep 2: Extracting structure...") |
| | from langchain_core.output_parsers import JsonOutputParser |
| | parser = JsonOutputParser() |
| |
|
| | try: |
| | structure = await agent.structure_chain.ainvoke({ |
| | "patent_text": patent_text[:8000], |
| | "format_instructions": parser.get_format_instructions() |
| | }) |
| |
|
| | logger.info(f"\nExtracted structure:") |
| | logger.info(f" Title: {structure.get('title', 'NOT FOUND')}") |
| | logger.info(f" Abstract: {structure.get('abstract', 'NOT FOUND')[:200] if structure.get('abstract') else 'NOT FOUND'}") |
| | logger.info(f" Patent ID: {structure.get('patent_id', 'NOT FOUND')}") |
| | logger.info(f" Independent claims: {len(structure.get('independent_claims', []))}") |
| | logger.info(f" Dependent claims: {len(structure.get('dependent_claims', []))}") |
| | logger.info(f"\nFull structure keys: {structure.keys()}") |
| |
|
| | except Exception as e: |
| | logger.error(f"Structure extraction failed: {e}") |
| | import traceback |
| | traceback.print_exc() |
| |
|
| | if __name__ == "__main__": |
| | asyncio.run(main()) |
| |
|