Spaces:

rt4u
/

marker

Sleeping

App Files Files Community

Vik Paruchuri commited on Nov 27, 2024

Commit

77842ad

2 Parent(s): 002d7b0 fcfa501

Merge branch 'master' into dev

Browse files

Files changed (3) hide show

.gitignore +1 -0
marker_server.py +19 -7
signatures/version1/cla.json +40 -0

.gitignore CHANGED Viewed

@@ -12,6 +12,7 @@ debug_data
 temp.md
 temp
 conversion_results
 # Byte-compiled / optimized / DLL files
 __pycache__/

 temp.md
 temp
 conversion_results
+uploads
 # Byte-compiled / optimized / DLL files
 __pycache__/

marker_server.py CHANGED Viewed

@@ -18,6 +18,14 @@ from marker.models import create_model_dict
 app_data = {}
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     app_data["models"] = create_model_dict()
@@ -30,10 +38,11 @@ async def lifespan(app: FastAPI):
 app = FastAPI(lifespan=lifespan)
 @app.get("/")
 async def root():
     return HTMLResponse(
-"""
 <h1>Marker API</h1>
 <ul>
     <li><a href="/docs">API Documentation</a></li>
@@ -45,8 +54,7 @@ async def root():
 class CommonParams(BaseModel):
     filepath: Annotated[
-        str,
-        Field(description="The path to the PDF file to convert.")
     ]
     page_range: Annotated[
         Optional[str],
@@ -58,11 +66,15 @@ class CommonParams(BaseModel):
     ] = None
     force_ocr: Annotated[
         bool,
-        Field(description="Force OCR on all pages of the PDF.  Defaults to False.  This can lead to worse results if you have good text in your PDFs (which is true in most cases).")
     ] = False
     paginate_output: Annotated[
         bool,
-        Field(description="Whether to paginate the output.  Defaults to False.  If set to True, each page of the output will be separated by a horizontal rule that contains the page number (2 newlines, {PAGE_NUMBER}, 48 - characters, 2 newlines).")
     ] = False
     output_format: Annotated[
         str,
@@ -106,7 +118,7 @@ async def convert_pdf(
         "output": text,
         "images": encoded,
         "metadata": metadata,
-        "success": True
     }
@@ -123,4 +135,4 @@ def main(port: int, host: str):
 if __name__ == "__main__":
-    main()

 app_data = {}
+UPLOAD_DIRECTORY = "./uploads"  # Directory to store uploaded files
+# Ensure the upload directory exists
+if not os.path.exists(UPLOAD_DIRECTORY):
+    os.makedirs(UPLOAD_DIRECTORY)
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     app_data["models"] = create_model_dict()
 app = FastAPI(lifespan=lifespan)
 @app.get("/")
 async def root():
     return HTMLResponse(
+        """
 <h1>Marker API</h1>
 <ul>
     <li><a href="/docs">API Documentation</a></li>
 class CommonParams(BaseModel):
     filepath: Annotated[
+        Optional[str], Field(description="The path to the PDF file to convert.")
     ]
     page_range: Annotated[
         Optional[str],
     ] = None
     force_ocr: Annotated[
         bool,
+        Field(
+            description="Force OCR on all pages of the PDF.  Defaults to False.  This can lead to worse results if you have good text in your PDFs (which is true in most cases)."
+        ),
     ] = False
     paginate_output: Annotated[
         bool,
+        Field(
+            description="Whether to paginate the output.  Defaults to False.  If set to True, each page of the output will be separated by a horizontal rule that contains the page number (2 newlines, {PAGE_NUMBER}, 48 - characters, 2 newlines)."
+        ),
     ] = False
     output_format: Annotated[
         str,
         "output": text,
         "images": encoded,
         "metadata": metadata,
+        "success": True,
     }
 if __name__ == "__main__":
+    main()

signatures/version1/cla.json CHANGED Viewed

@@ -47,6 +47,46 @@
       "created_at": "2024-08-18T07:44:04Z",
       "repoId": 712111618,
       "pullRequestNo": 257
     }
   ]
 }

       "created_at": "2024-08-18T07:44:04Z",
       "repoId": 712111618,
       "pullRequestNo": 257
+    },
+    {
+      "name": "conscienceli",
+      "id": 4034943,
+      "comment_id": 2333374932,
+      "created_at": "2024-09-06T06:56:25Z",
+      "repoId": 712111618,
+      "pullRequestNo": 276
+    },
+    {
+      "name": "jcytong",
+      "id": 156466,
+      "comment_id": 2334987739,
+      "created_at": "2024-09-07T01:27:51Z",
+      "repoId": 712111618,
+      "pullRequestNo": 278
+    },
+    {
+      "name": "syldor",
+      "id": 4158062,
+      "comment_id": 2428114896,
+      "created_at": "2024-10-22T03:05:34Z",
+      "repoId": 712111618,
+      "pullRequestNo": 309
+    },
+    {
+      "name": "iammosespaulr",
+      "id": 28682735,
+      "comment_id": 2448054789,
+      "created_at": "2024-10-30T18:36:24Z",
+      "repoId": 712111618,
+      "pullRequestNo": 327
+    },
+    {
+      "name": "tjbck",
+      "id": 25473318,
+      "comment_id": 2454051305,
+      "created_at": "2024-11-04T08:09:46Z",
+      "repoId": 712111618,
+      "pullRequestNo": 332
     }
   ]
 }