cryogenic22 commited on
Commit
a5210ed
·
verified ·
1 Parent(s): 421eabd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from docling.parsers import DOCXParser, PDFParser, PPTXParser
3
+ from docling.utils import to_json, to_markdown
4
+
5
+ st.title("Docling Document Processor")
6
+
7
+ # File uploader
8
+ uploaded_file = st.file_uploader("Choose a document (PDF, DOCX, PPTX)", type=["pdf", "docx", "pptx"])
9
+
10
+ if uploaded_file is not None:
11
+ # Determine file type and parse
12
+ file_extension = uploaded_file.name.split(".")[-1].lower()
13
+ if file_extension == "pdf":
14
+ parser = PDFParser()
15
+ elif file_extension == "docx":
16
+ parser = DOCXParser()
17
+ elif file_extension == "pptx":
18
+ parser = PPTXParser()
19
+ else:
20
+ st.error("Unsupported file type.")
21
+
22
+ # Parse the document
23
+ try:
24
+ document = parser.parse(uploaded_file)
25
+ except Exception as e:
26
+ st.error(f"Error parsing document: {e}")
27
+
28
+ # Display output options
29
+ output_format = st.radio("Select output format:", ("Markdown", "JSON"))
30
+
31
+ if output_format == "Markdown":
32
+ st.subheader("Markdown Output:")
33
+ st.write(to_markdown(document))
34
+ else:
35
+ st.subheader("JSON Output:")
36
+ st.json(to_json(document))