pradeep4321 commited on
Commit
dc7b22f
·
verified ·
1 Parent(s): 36d7981

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +34 -46
src/app.py CHANGED
@@ -1,4 +1,7 @@
1
  import os
 
 
 
2
  import tempfile
3
  from gtts import gTTS
4
  from docx import Document
@@ -12,23 +15,14 @@ def text_to_speech(text, output_file):
12
 
13
  def convert_docx_to_text(docx_file):
14
  doc = Document(docx_file)
15
- text = ""
16
- for paragraph in doc.paragraphs:
17
- text += paragraph.text + "\n"
18
- return text
19
 
20
  def convert_pdf_to_text(pdf_file):
21
- text = ""
22
  pdf_reader = PdfReader(pdf_file)
23
- for page in pdf_reader.pages:
24
- page_text = page.extract_text()
25
- if page_text:
26
- text += page_text
27
- return text
28
 
29
  def get_binary_file_downloader_html(bin_file, file_label='File'):
30
- data = bin_file
31
- b64 = base64.b64encode(data).decode()
32
  href = f'<a href="data:application/octet-stream;base64,{b64}" download="{file_label}" target="_blank">Download {file_label}</a>'
33
  return href
34
 
@@ -38,40 +32,34 @@ def main():
38
  uploaded_file = st.file_uploader("Upload a text, docx, or pdf file", type=["txt", "docx", "pdf"])
39
 
40
  if uploaded_file:
41
- try:
42
- with tempfile.NamedTemporaryFile(delete=False, suffix='.' + uploaded_file.name.split('.')[-1]) as temp_file:
43
- temp_file.write(uploaded_file.read())
44
- temp_file_path = temp_file.name
45
-
46
- file_extension = uploaded_file.name.split('.')[-1].lower()
47
-
48
- if file_extension == 'txt':
49
- with open(temp_file_path, 'r', encoding='utf-8') as f:
50
- text = f.read()
51
- elif file_extension == 'docx':
52
- text = convert_docx_to_text(temp_file_path)
53
- elif file_extension == 'pdf':
54
- text = convert_pdf_to_text(temp_file_path)
55
- else:
56
- st.error("Unsupported file format")
57
- return
58
-
59
- if not text.strip():
60
- st.warning("No text found in the file.")
61
- return
62
-
63
- with st.spinner("Converting text to speech..."):
64
- output_audio_file = os.path.join(tempfile.gettempdir(), "output.mp3")
65
- text_to_speech(text, output_audio_file)
66
-
67
- st.audio(output_audio_file, format="audio/mp3")
68
-
69
- with open(output_audio_file, 'rb') as f:
70
- audio_bytes = f.read()
71
- st.markdown(get_binary_file_downloader_html(audio_bytes, "output.mp3"), unsafe_allow_html=True)
72
-
73
- except Exception as e:
74
- st.error(f"An error occurred: {e}")
75
 
76
  if __name__ == "__main__":
77
  main()
 
1
  import os
2
+ os.environ["STREAMLIT_CONFIG_DIR"] = "/tmp/.streamlit"
3
+ os.makedirs("/tmp/.streamlit", exist_ok=True)
4
+
5
  import tempfile
6
  from gtts import gTTS
7
  from docx import Document
 
15
 
16
  def convert_docx_to_text(docx_file):
17
  doc = Document(docx_file)
18
+ return "\n".join([p.text for p in doc.paragraphs])
 
 
 
19
 
20
  def convert_pdf_to_text(pdf_file):
 
21
  pdf_reader = PdfReader(pdf_file)
22
+ return "\n".join([page.extract_text() or '' for page in pdf_reader.pages])
 
 
 
 
23
 
24
  def get_binary_file_downloader_html(bin_file, file_label='File'):
25
+ b64 = base64.b64encode(bin_file).decode()
 
26
  href = f'<a href="data:application/octet-stream;base64,{b64}" download="{file_label}" target="_blank">Download {file_label}</a>'
27
  return href
28
 
 
32
  uploaded_file = st.file_uploader("Upload a text, docx, or pdf file", type=["txt", "docx", "pdf"])
33
 
34
  if uploaded_file:
35
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.' + uploaded_file.name.split('.')[-1]) as tmp:
36
+ tmp.write(uploaded_file.read())
37
+ tmp_path = tmp.name
38
+
39
+ ext = uploaded_file.name.split('.')[-1].lower()
40
+ text = ""
41
+
42
+ if ext == 'txt':
43
+ with open(tmp_path, 'r', encoding='utf-8') as f:
44
+ text = f.read()
45
+ elif ext == 'docx':
46
+ text = convert_docx_to_text(tmp_path)
47
+ elif ext == 'pdf':
48
+ text = convert_pdf_to_text(tmp_path)
49
+
50
+ if not text.strip():
51
+ st.warning("No readable text found.")
52
+ return
53
+
54
+ output_audio_path = os.path.join(tempfile.gettempdir(), "output.mp3")
55
+ text_to_speech(text, output_audio_path)
56
+
57
+ st.audio(output_audio_path, format="audio/mp3")
58
+
59
+ with open(output_audio_path, 'rb') as audio_file:
60
+ audio_bytes = audio_file.read()
61
+
62
+ st.markdown(get_binary_file_downloader_html(audio_bytes, "output.mp3"), unsafe_allow_html=True)
 
 
 
 
 
 
63
 
64
  if __name__ == "__main__":
65
  main()