ThePWDemo / app.py
egyorev's picture
Update app.py
feb7a08 verified
import gradio as gr
from transformers import pipeline, AutoTokenizer
import logging
import traceback
import sys
import requests
from bs4 import BeautifulSoup
import concurrent.futures
import time
from gtts import gTTS
import io
import base64
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Initialize the summarizer with T5-small model
summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
tokenizer = AutoTokenizer.from_pretrained("t5-small")
def fetch_content_from_url(url):
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
paragraphs = soup.find_all('p')
content = ' '.join([p.get_text() for p in paragraphs])
return content[:10000] # Limit to first 10000 characters
except Exception as e:
return f"Error fetching content: {str(e)}"
def chunk_text(text, max_chunk_size=200):
words = text.split()[:1000] # Limit to first 1000 words
chunks = []
current_chunk = []
current_size