taprosoft commited on
Commit
d7bf9d0
·
unverified ·
1 Parent(s): 0279c26

fix: remove sycamore

Browse files
Files changed (2) hide show
  1. app.py +5 -2
  2. requirements.txt +0 -1
app.py CHANGED
@@ -10,11 +10,15 @@ import pymupdf
10
  import gradio as gr
11
  from chunking.base import CType
12
  from chunking.controller import Controller
13
- from chunking.parser import DoclingPDF, FastPDF, SycamorePDF, UnstructuredPDF
14
  from chunking.parser.fastpdf.util import OCRMode, bytes_to_base64
15
  from chunking.split.toc_builder import TOCBuilder
16
  from chunking.util.plot import plot_img, plot_pdf
17
 
 
 
 
 
18
  MAX_PAGES = os.getenv("MAX_PAGES", 10)
19
  MAX_PAGES_CHUNKING = os.getenv("MAX_PAGES", 50)
20
  MODEL_NAME = os.getenv("MODEL_NAME", "gemini-2.0-flash")
@@ -22,7 +26,6 @@ METHOD_MAP = {
22
  "chunking_fastpdf": FastPDF,
23
  "unstructured": UnstructuredPDF,
24
  "docling": DoclingPDF,
25
- "sycamore": SycamorePDF,
26
  }
27
  METHOD_LIST = list(METHOD_MAP.keys())
28
  TMP_DIR = Path("/tmp/visualize")
 
10
  import gradio as gr
11
  from chunking.base import CType
12
  from chunking.controller import Controller
13
+ from chunking.parser import DoclingPDF, FastPDF, UnstructuredPDF
14
  from chunking.parser.fastpdf.util import OCRMode, bytes_to_base64
15
  from chunking.split.toc_builder import TOCBuilder
16
  from chunking.util.plot import plot_img, plot_pdf
17
 
18
+ # install Poppler
19
+ os.system("apt-get update")
20
+ os.system("apt-get install -y poppler-utils tesseract-ocr")
21
+
22
  MAX_PAGES = os.getenv("MAX_PAGES", 10)
23
  MAX_PAGES_CHUNKING = os.getenv("MAX_PAGES", 50)
24
  MODEL_NAME = os.getenv("MODEL_NAME", "gemini-2.0-flash")
 
26
  "chunking_fastpdf": FastPDF,
27
  "unstructured": UnstructuredPDF,
28
  "docling": DoclingPDF,
 
29
  }
30
  METHOD_LIST = list(METHOD_MAP.keys())
31
  TMP_DIR = Path("/tmp/visualize")
requirements.txt CHANGED
@@ -2,4 +2,3 @@ pymupdf
2
  git+https://github.com/chunking-ai/chunking.git@main
3
  docling==2.32.0
4
  unstructured[pdf]
5
- sycamore-ai[local-inference]
 
2
  git+https://github.com/chunking-ai/chunking.git@main
3
  docling==2.32.0
4
  unstructured[pdf]