egyorev commited on
Commit
feb7a08
·
verified ·
1 Parent(s): 4db40d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -94
app.py CHANGED
@@ -7,7 +7,7 @@ import requests
7
  from bs4 import BeautifulSoup
8
  import concurrent.futures
9
  import time
10
- import pyttsx3
11
  import io
12
  import base64
13
 
@@ -17,9 +17,6 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
17
  summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
18
  tokenizer = AutoTokenizer.from_pretrained("t5-small")
19
 
20
- # Initialize pyttsx3
21
- engine = pyttsx3.init()
22
-
23
  def fetch_content_from_url(url):
24
  try:
25
  response = requests.get(url, timeout=10)
@@ -35,93 +32,4 @@ def chunk_text(text, max_chunk_size=200):
35
  words = text.split()[:1000] # Limit to first 1000 words
36
  chunks = []
37
  current_chunk = []
38
- current_size = 0
39
- for word in words:
40
- if current_size + len(word) > max_chunk_size:
41
- chunks.append(' '.join(current_chunk))
42
- current_chunk = [word]
43
- current_size = len(word)
44
- else:
45
- current_chunk.append(word)
46
- current_size += len(word) + 1
47
- if current_chunk:
48
- chunks.append(' '.join(current_chunk))
49
- return chunks
50
-
51
- def summarize_chunk(chunk):
52
- try:
53
- summary = summarizer(chunk, max_length=50, min_length=10, do_sample=False)[0]['summary_text']
54
- return summary
55
- except Exception as e:
56
- logging.error(f"Error summarizing chunk: {str(e)}")
57
- return ""
58
-
59
- def text_to_speech(text):
60
- try:
61
- engine.save_to_file(text, 'summary.mp3')
62
- engine.runAndWait()
63
- with open("summary.mp3", "rb") as audio_file:
64
- audio_base64 = base64.b64encode(audio_file.read()).decode('utf-8')
65
- return f'data:audio/mp3;base64,{audio_base64}'
66
- except Exception as e:
67
- logging.error(f"Error in text-to-speech conversion: {str(e)}")
68
- return None
69
-
70
- def summarize_text(input_text, is_url):
71
- try:
72
- start_time = time.time()
73
- if is_url:
74
- content = fetch_content_from_url(input_text)
75
- if content.startswith("Error"):
76
- return content, None
77
- else:
78
- content = input_text[:10000] # Limit direct input to 10000 characters
79
-
80
- chunks = chunk_text(content)
81
- with concurrent.futures.ThreadPoolExecutor() as executor:
82
- summaries = list(executor.map(summarize_chunk, chunks))
83
-
84
- full_summary = " ".join(summaries)
85
-
86
- processing_time = time.time() - start_time
87
- summary_with_time = f"Summary (processed in {processing_time:.2f} seconds):\n\n{full_summary}"
88
-
89
- audio = text_to_speech(full_summary)
90
-
91
- return summary_with_time, audio
92
- except Exception as e:
93
- error_msg = f"An error occurred: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
94
- logging.error(error_msg)
95
- return error_msg, None
96
-
97
- def debug_info():
98
- return (
99
- f"Python version: {sys.version}\n"
100
- f"Summarizer model: T5-small\n"
101
- f"TTS Engine: pyttsx3\n"
102
- f"Current working directory: {os.getcwd()}\n"
103
- f"Contents of current directory: {os.listdir()}"
104
- )
105
-
106
- iface = gr.Interface(
107
- fn=summarize_text,
108
- inputs=[
109
- gr.Textbox(lines=5, label="Input Text or URL"),
110
- gr.Checkbox(label="Is URL?")
111
- ],
112
- outputs=[
113
- gr.Textbox(label="Summary", lines=10),
114
- gr.Audio(label="Audio Summary")
115
- ],
116
- title="Quick Text Summarizer with Speech",
117
- description="Enter text or a URL to summarize. Long texts will be truncated for quick processing."
118
- )
119
-
120
- debug_interface = gr.Interface(
121
- fn=debug_info,
122
- inputs=None,
123
- outputs="text",
124
- title="Debug Information"
125
- )
126
-
127
- gr.TabbedInterface([iface, debug_interface], ["Summarizer", "Debug Info"]).launch()
 
7
  from bs4 import BeautifulSoup
8
  import concurrent.futures
9
  import time
10
+ from gtts import gTTS
11
  import io
12
  import base64
13
 
 
17
  summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
18
  tokenizer = AutoTokenizer.from_pretrained("t5-small")
19
 
 
 
 
20
  def fetch_content_from_url(url):
21
  try:
22
  response = requests.get(url, timeout=10)
 
32
  words = text.split()[:1000] # Limit to first 1000 words
33
  chunks = []
34
  current_chunk = []
35
+ current_size