Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from haystack.nodes.connector import Crawler | |
| from haystack.nodes import TransformersSummarizer | |
| import validators | |
| import json | |
| output_dir = "./crawled_files" | |
| crawler = Crawler(output_dir=output_dir) | |
| summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum") | |
| documents = [] | |
| def crawl_url_and_write_content(url): | |
| docs = crawler.crawl(urls=['https://www.deepset.ai/blog/haystack-node-for-information-extraction'], crawler_depth=0, overwrite_existing_files=True) | |
| for doc in docs: | |
| jsonObject = json.load(doc.open()) | |
| documents[0] = jsonObject | |
| # Streamlit App | |
| st.title('Summarizer Demo with Haystack Summarizer') | |
| url_text = st.text_input("Please Enter a url here",value="https://www.rba.gov.au/media-releases/2022/mr-22-12.html") | |
| if validators.url(url_text): | |
| crawl_url_and_write_content(url_text) | |
| summarize = st.button('Summarize') | |
| if summarize: | |
| summary = summarizer.predict(documents=documents) | |
| st.write(summary) |