Spaces:
Sleeping
Sleeping
File size: 2,897 Bytes
c4dca42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
"""
Document Reader Tool - MCP-style tool for reading document pages.
"""
from typing import Dict, List, Optional, Union
import json
def get_document_reader_tool_definition():
"""Get the OpenAI function definition for the document reader tool."""
return {
"type": "function",
"function": {
"name": "read_document_pages",
"description": "Read specific pages from a document or get the table of contents. If no page numbers are specified, returns the table of contents.",
"parameters": {
"type": "object",
"properties": {
"document_name": {
"type": "string",
"description": "Document identifier (e.g., 'Harmony_R1.8_UserGuide', 'Chorus_R1.1_InstallationGuide')"
},
"page_numbers": {
"type": "array",
"items": {
"type": "integer",
"minimum": 1
},
"description": "List of page numbers to read. If not specified, returns table of contents."
}
},
"required": ["document_name"]
}
}
}
def execute_document_read(document_reader, document_name: str, page_numbers: Optional[List[int]] = None) -> Dict:
"""
Execute a document read operation.
Args:
document_reader: Instance of DocumentReader
document_name: Name of the document
page_numbers: Optional list of page numbers to read
Returns:
Dictionary with document content
"""
try:
if page_numbers is None:
# Get table of contents
content = document_reader.read_pages(document_name, None)
return {
"status": "success",
"document": document_name,
"type": "table_of_contents",
"content": content
}
else:
# Read specific pages
content = document_reader.read_pages(document_name, page_numbers)
return {
"status": "success",
"document": document_name,
"type": "pages",
"page_numbers": page_numbers,
"content": content
}
except Exception as e:
return {
"status": "error",
"message": f"Error reading document: {str(e)}",
"document": document_name
}
def format_document_content_for_context(result: Dict) -> str:
"""Format document content for including in the context."""
if result["status"] != "success":
return f"Error reading {result['document']}: {result.get('message', 'Unknown error')}"
return result["content"] |