Spaces:

argodevops
/

ai-summarisation

Sleeping

ferguch9 commited on Apr 1, 2024

Commit

19977fc

1 Parent(s): 20c1d8c

feat: support for word docs

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import streamlit as st
 import os
 import PyPDF2
@@ -32,6 +33,12 @@ def load_pdf_file(file):
     return pdf_text
 def split_text_into_chunks(text, max_chunk_length):
     chunks = []
     current_chunk = ""
@@ -84,6 +91,8 @@ def main():
             _, file_ext = os.path.splitext(file_name)
             if "pdf" in file_ext:
                 sentence = load_pdf_file(uploaded_file)
             else:
                 sentence = load_text_file(uploaded_file)
         st.write(f"{len(sentence)} characters and {len(sentence.split())} words")

+import docx
 import streamlit as st
 import os
 import PyPDF2
     return pdf_text
+def load_word_file(file):
+    doc = docx.Document(file)
+    paragraphs = [p.text for p in doc.paragraphs]
+    return "\n".join(paragraphs)
 def split_text_into_chunks(text, max_chunk_length):
     chunks = []
     current_chunk = ""
             _, file_ext = os.path.splitext(file_name)
             if "pdf" in file_ext:
                 sentence = load_pdf_file(uploaded_file)
+            elif "docx" in file_ext:
+                sentence = load_word_file(uploaded_file)
             else:
                 sentence = load_text_file(uploaded_file)
         st.write(f"{len(sentence)} characters and {len(sentence.split())} words")

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 streamlit
 #tensorflow
 #tf-keras

+python-docx
 streamlit
 #tensorflow
 #tf-keras