File size: 1,114 Bytes
484f683 09aef7c f9db6c6 745c3e0 f147862 da0397f c2b73fe feb7a08 8d79195 484f683 745c3e0 0eb8dff c2b73fe 09aef7c f147862 09aef7c c2b73fe f147862 09aef7c f147862 c2b73fe 09aef7c feb7a08 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | import gradio as gr
from transformers import pipeline, AutoTokenizer
import logging
import traceback
import sys
import requests
from bs4 import BeautifulSoup
import concurrent.futures
import time
from gtts import gTTS
import io
import base64
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Initialize the summarizer with T5-small model
summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
tokenizer = AutoTokenizer.from_pretrained("t5-small")
def fetch_content_from_url(url):
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
paragraphs = soup.find_all('p')
content = ' '.join([p.get_text() for p in paragraphs])
return content[:10000] # Limit to first 10000 characters
except Exception as e:
return f"Error fetching content: {str(e)}"
def chunk_text(text, max_chunk_size=200):
words = text.split()[:1000] # Limit to first 1000 words
chunks = []
current_chunk = []
current_size
|