File size: 2,897 Bytes
c4dca42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
Document Reader Tool - MCP-style tool for reading document pages.
"""

from typing import Dict, List, Optional, Union
import json


def get_document_reader_tool_definition():
    """Get the OpenAI function definition for the document reader tool."""
    return {
        "type": "function",
        "function": {
            "name": "read_document_pages",
            "description": "Read specific pages from a document or get the table of contents. If no page numbers are specified, returns the table of contents.",
            "parameters": {
                "type": "object",
                "properties": {
                    "document_name": {
                        "type": "string",
                        "description": "Document identifier (e.g., 'Harmony_R1.8_UserGuide', 'Chorus_R1.1_InstallationGuide')"
                    },
                    "page_numbers": {
                        "type": "array",
                        "items": {
                            "type": "integer",
                            "minimum": 1
                        },
                        "description": "List of page numbers to read. If not specified, returns table of contents."
                    }
                },
                "required": ["document_name"]
            }
        }
    }


def execute_document_read(document_reader, document_name: str, page_numbers: Optional[List[int]] = None) -> Dict:
    """
    Execute a document read operation.
    
    Args:
        document_reader: Instance of DocumentReader
        document_name: Name of the document
        page_numbers: Optional list of page numbers to read
    
    Returns:
        Dictionary with document content
    """
    try:
        if page_numbers is None:
            # Get table of contents
            content = document_reader.read_pages(document_name, None)
            return {
                "status": "success",
                "document": document_name,
                "type": "table_of_contents",
                "content": content
            }
        else:
            # Read specific pages
            content = document_reader.read_pages(document_name, page_numbers)
            return {
                "status": "success",
                "document": document_name,
                "type": "pages",
                "page_numbers": page_numbers,
                "content": content
            }
            
    except Exception as e:
        return {
            "status": "error",
            "message": f"Error reading document: {str(e)}",
            "document": document_name
        }


def format_document_content_for_context(result: Dict) -> str:
    """Format document content for including in the context."""
    if result["status"] != "success":
        return f"Error reading {result['document']}: {result.get('message', 'Unknown error')}"
    
    return result["content"]