u-ashish commited on
Commit
7f20e45
·
1 Parent(s): 08c299e

Add local_entrypoint and update README

Browse files
examples/README_MODAL.md CHANGED
@@ -25,13 +25,43 @@ Notes:
25
 
26
  Which will create a [`Modal Volume`](https://modal.com/docs/reference/modal.Volume) to store them for re-use.
27
 
28
- - Regardless, once the deploy is finished, you can submit a request. To do so, get the base URL for your endpoint:
29
- - Go into Modal
30
- - Find the app (default name: `datalab-marker-modal-demo`)
31
- - Click on `MarkerModalDemoService`
32
- - You should see the URL there
 
 
33
 
34
- - Make a request to `{BASE_URL}/convert` like this (you can also use Insomnia, etc. to help):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  ```
36
  curl --request POST \
37
  --url {BASE_URL}/convert \
@@ -54,4 +84,5 @@ You should get a response like this
54
  "metadata": {... page level metadata ...},
55
  "page_count": 2
56
  }
57
- ```
 
 
25
 
26
  Which will create a [`Modal Volume`](https://modal.com/docs/reference/modal.Volume) to store them for re-use.
27
 
28
+ Once the deploy is finished, you can:
29
+ - Test a file upload locally through your CLI using an `invoke_conversion` command we expose through Modal's [`local_entrypoint`](https://modal.com/docs/reference/modal.App#local_entrypoint)
30
+ - Get the URL of your endpoint and make a request through a client of your choice.
31
+
32
+ **Test from your CLI with `invoke_conversion`**
33
+
34
+ If your endpoint is live, simply run this command:
35
 
36
+ ```
37
+ $ modal run marker_modal_deployment.py::invoke_conversion --pdf-file <PDF_FILE_PATH> --output-format markdown
38
+ ```
39
+
40
+ And it'll automatically detect the URL of your new endpoint using [`.get_web_url()`](https://modal.com/docs/guide/webhook-urls#determine-the-url-of-a-web-endpoint-from-code), make sure it's healthy, submit your file, and store its output on your machine (in the same directory).
41
+
42
+ **Making a request using your own client**
43
+
44
+ If you want to make requests elsewhere e.g. with cURL or a client like Insomnia, you'll need to get the URL.
45
+
46
+ When your `modal deploy` command from earlier finishes, it'll include your endpoint URL at the end. For example:
47
+
48
+ ```
49
+ $ modal deploy marker_modal_deployment.py
50
+ ...
51
+ ✓ Created objects.
52
+ ├── 🔨 Created mount /marker/examples/marker_modal_deployment.py
53
+ ├── 🔨 Created function download_models.
54
+ ├── 🔨 Created function MarkerModalDemoService.*.
55
+ └── 🔨 Created web endpoint for MarkerModalDemoService.fastapi_app => <YOUR_ENDPOINT_URL>
56
+ ✓ App deployed in 149.877s! 🎉
57
+ ```
58
+
59
+ If you accidentally close your terminal session, you can also always go into Modal's dashboard and:
60
+ - Find the app (default name: `datalab-marker-modal-demo`)
61
+ - Click on `MarkerModalDemoService`
62
+ - Find your endpoint URL
63
+
64
+ Once you have your URL, make a request to `{YOUR_ENDPOINT_URL}/convert` like this (you can also use Insomnia, etc.):
65
  ```
66
  curl --request POST \
67
  --url {BASE_URL}/convert \
 
84
  "metadata": {... page level metadata ...},
85
  "page_count": 2
86
  }
87
+ ```
88
+
examples/marker_modal_deployment.py CHANGED
@@ -4,6 +4,7 @@ Modal deployment for Datalab Marker PDF conversion service.
4
 
5
  import modal
6
  import os
 
7
 
8
  # Define the Modal app
9
  app = modal.App("datalab-marker-modal-demo")
@@ -119,7 +120,7 @@ class MarkerModalDemoService:
119
  self.models = None
120
 
121
  @modal.asgi_app()
122
- def fastapi_app(self):
123
  import traceback
124
  import io
125
  import base64
@@ -296,3 +297,101 @@ class MarkerModalDemoService:
296
  )
297
 
298
  return web_app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  import modal
6
  import os
7
+ from typing import Optional
8
 
9
  # Define the Modal app
10
  app = modal.App("datalab-marker-modal-demo")
 
120
  self.models = None
121
 
122
  @modal.asgi_app()
123
+ def marker_api(self):
124
  import traceback
125
  import io
126
  import base64
 
297
  )
298
 
299
  return web_app
300
+
301
+
302
+ #
303
+ # This does not get deployed. It's a useful entrypoint from your local CLI
304
+ # that you can use to test your deployment. It'll store the
305
+ # API response in a new file on your machine.
306
+ #
307
+ @app.local_entrypoint()
308
+ async def invoke_conversion(
309
+ pdf_file: Optional[str] = None,
310
+ output_format: str = "markdown",
311
+ env: str = 'main'
312
+ ):
313
+ """
314
+ Local entrypoint to test your deployed Marker endpoint in Modal.
315
+
316
+ Usage:
317
+ modal run marker_modal_deployment.py::invoke_conversion --pdf-file /path/to/file.pdf --output-format markdown
318
+ """
319
+ import requests
320
+ import json
321
+ from pathlib import Path
322
+
323
+ if not pdf_file:
324
+ print("No PDF file specified. Use --pdf-file /path/to/your.pdf")
325
+ return
326
+
327
+ pdf_path = Path(pdf_file)
328
+ if not pdf_path.exists():
329
+ print(f"File not found: {pdf_file}")
330
+ return
331
+
332
+ #
333
+ # Get the web URL for our deployed service
334
+ #
335
+ try:
336
+ service = modal.Cls.from_name(
337
+ "datalab-marker-modal-demo",
338
+ "MarkerModalDemoService",
339
+ environment_name=env
340
+ )
341
+ web_url = service().marker_api.get_web_url()
342
+ print(f"Found deployed service at: {web_url}")
343
+ except Exception as e:
344
+ print(f"Error getting web URL: {e}")
345
+ print("Make sure you've deployed the service first with: modal deploy marker_modal_deployment.py")
346
+ return
347
+
348
+ print(f"Testing conversion of: {pdf_path.name}")
349
+ print(f"Output format: {output_format}")
350
+
351
+ #
352
+ # Test health endpoint first
353
+ #
354
+ try:
355
+ health_response = requests.get(f"{web_url}/health")
356
+ health_data = health_response.json()
357
+ print(f"Service health: {health_data['status']}")
358
+ print(f"Models loaded: {health_data['models_loaded']} ({health_data['model_count']} models)")
359
+
360
+ if not health_data['models_loaded']:
361
+ print("Warning: Models not loaded yet. First request may be slow.")
362
+
363
+ except Exception as e:
364
+ print(f"Health check failed: {e}")
365
+
366
+ #
367
+ # Make conversion request
368
+ #
369
+ try:
370
+ with open(pdf_path, 'rb') as f:
371
+ files = {'file': (pdf_path.name, f, 'application/pdf')}
372
+ data = {'output_format': output_format}
373
+
374
+ print(f"Sending request to {web_url}/convert...")
375
+ response = requests.post(f"{web_url}/convert", files=files, data=data)
376
+
377
+ if response.status_code == 200:
378
+ result = response.json()
379
+ print(f"✅ Conversion successful!")
380
+ print(f"Filename: {result['filename']}")
381
+ print(f"Format: {result['output_format']}")
382
+ print(f"Pages: {result['page_count']}")
383
+
384
+ output_file = f"{pdf_path.stem}_response.json"
385
+ with open(output_file, 'w', encoding='utf-8') as f:
386
+ json.dump(result, f, indent=2, ensure_ascii=False)
387
+ print(f"Full API response saved to: {output_file}")
388
+
389
+ if result['images']:
390
+ print(f"Images extracted: {len(result['images'])}")
391
+
392
+ else:
393
+ print(f"❌ Conversion failed: {response.status_code}")
394
+ print(f"Error: {response.text}")
395
+
396
+ except Exception as e:
397
+ print(f"Request failed: {e}")