| | import warnings |
| | import torch |
| | import gradio |
| | from transformers import pipeline |
| | from bs4 import BeautifulSoup |
| | import requests |
| |
|
| | summarizer = pipeline("summarization", model="stevhliu/my_awesome_billsum_model") |
| | url = "https://medium.com/analytics-vidhya/openai-gpt-3-language-models-are-few-shot-learners-82531b3d3122" |
| | r = requests.get(url) |
| | soup = BeautifulSoup(r.text, 'html.parser') |
| | results = soup.find_all(['hl', 'p']) |
| | text = [result.text for result in results] |
| | Article = ''.join(text) |
| | max_chunk = 500 |
| | Article = Article.replace('.', '') |
| | Article = Article.replace('?', '') |
| | Article = Article.replace('!', '') |
| |
|
| | def SUMMARIZE(Url): |
| | Url = "https://medium.com/analytics-vidhya/openai-gpt-3-language-models-are-few-shot-learners-82531b3d3122" |
| | summarizer = pipeline("summarization", model="stevhliu/my_awesome_billsum_model") |
| | r = requests.get(Url) |
| | soup = BeautifulSoup(r.text, 'html.parser') |
| | results = soup.find_all(['hl', 'p']) |
| | text = [result.text for result in results] |
| | Article = ''.join(text) |
| | sentences = Article.split(' ') |
| | current_chunk = 0 |
| | chunks = [] |
| | for sentence in sentences: |
| | if len(chunks) == current_chunk + 1: |
| | if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk: |
| | chunks[current_chunk].extend(sentence.split(' ')) |
| | else: |
| | current_chunk += 1 |
| | chunks.append(sentence.split(' ')) |
| | else: |
| | |
| | chunks.append(sentence.split(' ')) |
| |
|
| | for chunk_id in range(len(chunks)): |
| | chunks[chunk_id] = ' '.join(chunks[chunk_id]) |
| | res = summarizer(chunks, max_length=120, min_length=30, do_sample=False) |
| | for i in range(len(res)): |
| | return res[i].values() |