import gradio as gr from transformers import pipeline, AutoTokenizer import logging import traceback import sys import requests from bs4 import BeautifulSoup import concurrent.futures import time from gtts import gTTS import io import base64 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Initialize the summarizer with T5-small model summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small") tokenizer = AutoTokenizer.from_pretrained("t5-small") def fetch_content_from_url(url): try: response = requests.get(url, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') paragraphs = soup.find_all('p') content = ' '.join([p.get_text() for p in paragraphs]) return content[:10000] # Limit to first 10000 characters except Exception as e: return f"Error fetching content: {str(e)}" def chunk_text(text, max_chunk_size=200): words = text.split()[:1000] # Limit to first 1000 words chunks = [] current_chunk = [] current_size