Spaces:
Runtime error
Runtime error
| from sumy.parsers.plaintext import PlaintextParser | |
| from sumy.nlp.tokenizers import Tokenizer | |
| from sumy.summarizers.lsa import LsaSummarizer | |
| from newspaper import Article | |
| import os | |
| import requests | |
| import gpt_2_simple as gpt2 | |
| import tensorflow as tf | |
| import nltk | |
| nltk.download('punkt') | |
| def extract_course_information(course_design_variables): | |
| course_data = {} | |
| # Retrieve course information from the web | |
| url = course_design_variables["url"] | |
| article = Article(url) | |
| article.download() | |
| article.parse() | |
| # Extract course title | |
| course_title = article.title if article.title else "Title not found on the page" | |
| course_data['course_title'] = course_title | |
| # Extract course description | |
| course_description = article.text if article.text else "Description not found on the page" | |
| course_data['course_description'] = course_description | |
| # Extract authors | |
| authors = ', '.join(article.authors) if article.authors else "Authors not found" | |
| course_data['authors'] = authors | |
| # Extract publish date | |
| publish_date = article.publish_date if article.publish_date else "Publish date not found" | |
| course_data['publish_date'] = publish_date | |
| # Extract keywords | |
| keywords = ', '.join(article.keywords) if article.keywords else "Keywords not found" | |
| course_data['keywords'] = keywords | |
| return course_data, article | |
| # Example Usage | |
| course_url = "https://uwex.wisconsin.edu/sustainable-management/masters/" | |
| course_design_variables = {"url": course_url} | |
| # Extract course information | |
| course_data, article = extract_course_information(course_design_variables) | |
| if False: | |
| # Print the extracted information | |
| print("Course Title: ", course_data['course_title']) | |
| print("Course Description: ", course_data['course_description']) | |
| print("Authors: ", course_data['authors']) | |
| print("Publish Date: ", course_data['publish_date']) | |
| print("Keywords: ", course_data['keywords']) | |
| # Generate text with Sumy | |
| parser = PlaintextParser.from_string(course_data['course_description'], Tokenizer("english")) | |
| summarizer = LsaSummarizer() | |
| summary_sumy = summarizer(parser.document, 3) | |
| print("\nSumy Summary and remove the html content from this content :\n", summary_sumy) |