Spaces:
Runtime error
Runtime error
enstazao
commited on
Commit
·
ae85af3
1
Parent(s):
50eb7ef
added lines for dubug
Browse files
main.py
CHANGED
|
@@ -7,6 +7,7 @@ def fetch_webpage_content(url):
|
|
| 7 |
try:
|
| 8 |
response = requests.get(url, timeout=10) # Ensures the use of standard HTTP/HTTPS ports
|
| 9 |
response.raise_for_status() # Raises an error for bad responses
|
|
|
|
| 10 |
return response.text
|
| 11 |
except requests.exceptions.RequestException as e:
|
| 12 |
print(f"Error fetching the webpage: {e}")
|
|
@@ -22,7 +23,7 @@ def parse_and_segment_content(html_content):
|
|
| 22 |
text = ' '.join([result.text for result in results])
|
| 23 |
text = text.replace('.', '.<eos>').replace('!', '!<eos>').replace('?', '?<eos>')
|
| 24 |
sentences = text.split('<eos>')
|
| 25 |
-
|
| 26 |
max_chunk = 500
|
| 27 |
chunks = []
|
| 28 |
current_chunk = -1
|
|
@@ -45,7 +46,7 @@ def summarize_text(chunks):
|
|
| 45 |
|
| 46 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
| 47 |
summaries = []
|
| 48 |
-
|
| 49 |
for chunk in chunks:
|
| 50 |
try:
|
| 51 |
summary = summarizer(chunk, max_length=50, min_length=30, do_sample=False)
|
|
|
|
| 7 |
try:
|
| 8 |
response = requests.get(url, timeout=10) # Ensures the use of standard HTTP/HTTPS ports
|
| 9 |
response.raise_for_status() # Raises an error for bad responses
|
| 10 |
+
print("Hello", response.text)
|
| 11 |
return response.text
|
| 12 |
except requests.exceptions.RequestException as e:
|
| 13 |
print(f"Error fetching the webpage: {e}")
|
|
|
|
| 23 |
text = ' '.join([result.text for result in results])
|
| 24 |
text = text.replace('.', '.<eos>').replace('!', '!<eos>').replace('?', '?<eos>')
|
| 25 |
sentences = text.split('<eos>')
|
| 26 |
+
print("Doing segmentation")
|
| 27 |
max_chunk = 500
|
| 28 |
chunks = []
|
| 29 |
current_chunk = -1
|
|
|
|
| 46 |
|
| 47 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
| 48 |
summaries = []
|
| 49 |
+
print("Summarizing content")
|
| 50 |
for chunk in chunks:
|
| 51 |
try:
|
| 52 |
summary = summarizer(chunk, max_length=50, min_length=30, do_sample=False)
|