cryogenic22 commited on
Commit
b95ce3f
·
verified ·
1 Parent(s): 3e665fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -52
app.py CHANGED
@@ -1,58 +1,32 @@
1
  import streamlit as st
2
- from docling.parsers import DOCXParser, PDFParser, PPTXParser
3
- from docling.utils import to_json, to_markdown
4
- import sys
5
- import traceback
 
 
6
 
7
- def init_parsers():
8
- parsers = {
9
- "pdf": PDFParser(),
10
- "docx": DOCXParser(),
11
- "pptx": PPTXParser()
12
- }
13
- return parsers
14
-
15
- def process_document(file, parser):
16
  try:
17
- document = parser.parse(file)
18
- return document, None
 
 
 
 
 
 
 
 
19
  except Exception as e:
20
- error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
21
- return None, error_msg
22
-
23
- def main():
24
- st.set_page_config(page_title="Document Processor")
25
- st.title("Docling Document Processor")
26
-
27
- try:
28
- parsers = init_parsers()
29
- except ImportError as e:
30
- st.error("Error: Required packages not installed. Please run: pip install docling python-docx pdfminer.six python-pptx")
31
- return
32
 
33
- uploaded_file = st.file_uploader("Choose a document", type=list(parsers.keys()))
34
-
35
- if uploaded_file:
36
- file_extension = uploaded_file.name.split(".")[-1].lower()
37
- parser = parsers.get(file_extension)
38
-
39
- document, error = process_document(uploaded_file, parser)
40
-
41
- if error:
42
- st.error(f"Failed to parse document:\n{error}")
43
- return
44
-
45
- output_format = st.radio("Select output format:", ("Markdown", "JSON"))
46
-
47
- try:
48
- if output_format == "Markdown":
49
- st.subheader("Markdown Output:")
50
- st.markdown(to_markdown(document))
51
- else:
52
- st.subheader("JSON Output:")
53
- st.json(to_json(document))
54
- except Exception as e:
55
- st.error(f"Error converting document: {str(e)}")
56
 
57
- if __name__ == "__main__":
58
- main()
 
 
 
 
 
 
1
  import streamlit as st
2
+ import logging
3
+ from pathlib import Path
4
+ from docling.document_converter import DocumentConverter
5
+ from docling.datamodel.base_models import InputFormat
6
+ from docling.datamodel.pipeline_options import PdfPipelineOptions
7
+ from docling.document_converter import PdfFormatOption
8
 
9
+ def test_docling():
 
 
 
 
 
 
 
 
10
  try:
11
+ pipeline_options = PdfPipelineOptions()
12
+ pipeline_options.do_ocr = False
13
+ pipeline_options.do_table_structure = True
14
+
15
+ converter = DocumentConverter(
16
+ format_options={
17
+ InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
18
+ }
19
+ )
20
+ return True, "Docling setup successful"
21
  except Exception as e:
22
+ return False, f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ st.title("Docling Test App")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ if st.button("Test Docling Setup"):
27
+ with st.spinner("Testing Docling installation..."):
28
+ success, message = test_docling()
29
+ if success:
30
+ st.success(message)
31
+ else:
32
+ st.error(message)