pradeep4321 commited on
Commit
2b5b7f0
·
verified ·
1 Parent(s): ad487d7

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +25 -26
src/app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
- os.environ["STREAMLIT_CONFIG_DIR"] = "/src/.streamlit"
3
- os.makedirs("/src/.streamlit", exist_ok=True)
4
 
5
  import tempfile
6
  from gtts import gTTS
@@ -8,58 +8,57 @@ from docx import Document
8
  from PyPDF2 import PdfReader
9
  import streamlit as st
10
  import base64
 
11
 
12
- def text_to_speech(text, output_file):
13
  tts = gTTS(text)
14
- tts.save(output_file)
 
 
 
15
 
16
  def convert_docx_to_text(docx_file):
17
  doc = Document(docx_file)
18
  return "\n".join([p.text for p in doc.paragraphs])
19
 
20
  def convert_pdf_to_text(pdf_file):
21
- pdf_reader = PdfReader(pdf_file)
22
- return "\n".join([page.extract_text() or '' for page in pdf_reader.pages])
23
 
24
- def get_binary_file_downloader_html(bin_file, file_label='File'):
25
- b64 = base64.b64encode(bin_file).decode()
26
- href = f'<a href="data:application/octet-stream;base64,{b64}" download="{file_label}" target="_blank">Download {file_label}</a>'
27
  return href
28
 
29
  def main():
30
- st.title("Text to Speech Converter")
31
 
32
  uploaded_file = st.file_uploader("Upload a text, docx, or pdf file", type=["txt", "docx", "pdf"])
33
 
34
  if uploaded_file:
35
- with tempfile.NamedTemporaryFile(delete=False, suffix='.' + uploaded_file.name.split('.')[-1]) as tmp:
36
- tmp.write(uploaded_file.read())
37
- tmp_path = tmp.name
38
-
39
  ext = uploaded_file.name.split('.')[-1].lower()
40
  text = ""
41
 
42
  if ext == 'txt':
43
- with open(tmp_path, 'r', encoding='utf-8') as f:
44
- text = f.read()
45
  elif ext == 'docx':
46
- text = convert_docx_to_text(tmp_path)
47
  elif ext == 'pdf':
48
- text = convert_pdf_to_text(tmp_path)
 
 
 
49
 
50
  if not text.strip():
51
  st.warning("No readable text found.")
52
  return
53
 
54
- output_audio_path = os.path.join(tempfile.gettempdir(), "output.mp3")
55
- text_to_speech(text, output_audio_path)
56
-
57
- st.audio(output_audio_path, format="audio/mp3")
58
-
59
- with open(output_audio_path, 'rb') as audio_file:
60
- audio_bytes = audio_file.read()
61
 
62
- st.markdown(get_binary_file_downloader_html(audio_bytes, "output.mp3"), unsafe_allow_html=True)
 
63
 
64
  if __name__ == "__main__":
65
  main()
 
1
  import os
2
+ os.environ["STREAMLIT_CONFIG_DIR"] = "/tmp/.streamlit"
3
+ os.makedirs("/tmp/.streamlit", exist_ok=True)
4
 
5
  import tempfile
6
  from gtts import gTTS
 
8
  from PyPDF2 import PdfReader
9
  import streamlit as st
10
  import base64
11
+ from io import BytesIO
12
 
13
+ def text_to_speech(text):
14
  tts = gTTS(text)
15
+ audio_buffer = BytesIO()
16
+ tts.write_to_fp(audio_buffer)
17
+ audio_buffer.seek(0)
18
+ return audio_buffer
19
 
20
  def convert_docx_to_text(docx_file):
21
  doc = Document(docx_file)
22
  return "\n".join([p.text for p in doc.paragraphs])
23
 
24
  def convert_pdf_to_text(pdf_file):
25
+ reader = PdfReader(pdf_file)
26
+ return "\n".join([page.extract_text() or '' for page in reader.pages])
27
 
28
+ def get_download_link(audio_data, filename="output.mp3"):
29
+ b64 = base64.b64encode(audio_data).decode()
30
+ href = f'<a href="data:audio/mp3;base64,{b64}" download="{filename}">Download {filename}</a>'
31
  return href
32
 
33
  def main():
34
+ st.title("Text to Speech Converter (No File Write)")
35
 
36
  uploaded_file = st.file_uploader("Upload a text, docx, or pdf file", type=["txt", "docx", "pdf"])
37
 
38
  if uploaded_file:
 
 
 
 
39
  ext = uploaded_file.name.split('.')[-1].lower()
40
  text = ""
41
 
42
  if ext == 'txt':
43
+ text = uploaded_file.read().decode("utf-8")
 
44
  elif ext == 'docx':
45
+ text = convert_docx_to_text(uploaded_file)
46
  elif ext == 'pdf':
47
+ text = convert_pdf_to_text(uploaded_file)
48
+ else:
49
+ st.error("Unsupported file format")
50
+ return
51
 
52
  if not text.strip():
53
  st.warning("No readable text found.")
54
  return
55
 
56
+ with st.spinner("Converting to speech..."):
57
+ audio_buffer = text_to_speech(text)
58
+ audio_bytes = audio_buffer.read()
 
 
 
 
59
 
60
+ st.audio(audio_bytes, format="audio/mp3")
61
+ st.markdown(get_download_link(audio_bytes), unsafe_allow_html=True)
62
 
63
  if __name__ == "__main__":
64
  main()