cryogenic22 commited on
Commit
6e4c2b6
·
verified ·
1 Parent(s): 9a39da2

Update sdk/interfaces/document.py

Browse files
Files changed (1) hide show
  1. sdk/interfaces/document.py +104 -1
sdk/interfaces/document.py CHANGED
@@ -71,4 +71,107 @@ class DocumentClient:
71
 
72
  # Prepare files
73
  for file in files:
74
- if isinstance(file, (str, Path)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  # Prepare files
73
  for file in files:
74
+ if isinstance(file, (str, Path)):
75
+ file_path = Path(file)
76
+ async with aiofiles.open(file_path, 'rb') as f:
77
+ file_content = await f.read()
78
+ filename = file_path.name
79
+ else:
80
+ file_content = file.read()
81
+ filename = getattr(file, 'name', f'document_{len(upload_files)}')
82
+
83
+ # Create temporary file
84
+ temp_file = aiofiles.tempfile.SpooledTemporaryFile()
85
+ temp_file.write(file_content)
86
+ temp_file.seek(0)
87
+ temp_files.append(temp_file)
88
+
89
+ upload_files.append(
90
+ ('files', (filename, temp_file, 'application/octet-stream'))
91
+ )
92
+
93
+ # Make request
94
+ response = await self.client.post(
95
+ "/api/v1/document/batch",
96
+ files=upload_files
97
+ )
98
+
99
+ # Process response
100
+ return {
101
+ filename: ProcessedDocument(**doc['document'])
102
+ for filename, doc in response.items()
103
+ }
104
+
105
+ except Exception as e:
106
+ self.logger.error(f"Batch processing failed: {str(e)}")
107
+ raise
108
+ finally:
109
+ # Clean up temporary files
110
+ for temp_file in temp_files:
111
+ temp_file.close()
112
+
113
+ async def get_supported_types(self) -> Dict[str, List[str]]:
114
+ """Get supported document types"""
115
+ try:
116
+ response = await self.client.get("/api/v1/document/supported-types")
117
+ return response['supported_types']
118
+ except Exception as e:
119
+ self.logger.error(f"Failed to get supported types: {str(e)}")
120
+ raise
121
+
122
+ async def validate_config(self, config: Dict[str, Any]) -> bool:
123
+ """Validate document processing configuration"""
124
+ try:
125
+ response = await self.client.get(
126
+ "/api/v1/document/config/validate",
127
+ params={"config": json.dumps(config)}
128
+ )
129
+ return response['valid']
130
+ except Exception as e:
131
+ self.logger.error(f"Config validation failed: {str(e)}")
132
+ raise
133
+
134
+ async def health_check(self) -> Dict[str, Any]:
135
+ """Check document processor health"""
136
+ try:
137
+ return await self.client.get("/api/v1/document/health")
138
+ except Exception as e:
139
+ self.logger.error(f"Health check failed: {str(e)}")
140
+ raise
141
+
142
+ # Usage example:
143
+ async def example_usage():
144
+ # Initialize client
145
+ client = LatticeClient(api_key="your-api-key")
146
+
147
+ # Configure document processing
148
+ config = {
149
+ "extract_text": True,
150
+ "extract_metadata": True,
151
+ "chunk_size": 500,
152
+ "chunk_overlap": 50
153
+ }
154
+
155
+ # Process single document
156
+ doc_result = await client.document.process_document(
157
+ "example.pdf",
158
+ config=config
159
+ )
160
+
161
+ print(f"Processed document: {doc_result.doc_id}")
162
+ print(f"Number of chunks: {len(doc_result.chunks)}")
163
+
164
+ # Batch process documents
165
+ files = ["doc1.pdf", "doc2.docx", "doc3.txt"]
166
+ batch_results = await client.document.batch_process(files)
167
+
168
+ for filename, result in batch_results.items():
169
+ print(f"{filename}: {len(result.chunks)} chunks")
170
+
171
+ # Check supported types
172
+ supported_types = await client.document.get_supported_types()
173
+ print(f"Supported types: {supported_types}")
174
+
175
+ if __name__ == "__main__":
176
+ import asyncio
177
+ asyncio.run(example_usage())