Bussiness-plan-make-3

Build error

App Files Files Community

Bussiness-plan-make-3 / functions.py

SoDa12321

Create functions.py

65e936e verified over 1 year ago

raw

history blame contribute delete

2.74 kB

	# functions.py

	import os
	import io
	import re
	import requests
	from docx import Document
	from newspaper import Article
	from langdetect import detect
	from sumy.parsers.plaintext import PlaintextParser
	from sumy.nlp.tokenizers import Tokenizer
	from sumy.summarizers.lsa import LsaSummarizer
	from transformers import pipeline
	import nltk

	nltk.download('punkt')

	def extract_content_from_url(url):
	"""
	Extracts the title and text content from a given URL using the newspaper3k library.
	"""
	try:
	article = Article(url)
	article.download()
	article.parse()
	title = article.title
	text = article.text
	return {"title": title, "text": text}
	except Exception as e:
	print(f"Error extracting content from URL: {e}")
	return {"title": "", "text": ""}

	def summarize_text(text, num_sentences=5):
	"""
	Summarizes the given text using the LSA summarizer from the Sumy library.
	"""
	try:
	language = detect(text)
	except:
	language = 'english'
	parser = PlaintextParser.from_string(text, Tokenizer(language))
	summarizer = LsaSummarizer()
	summary = summarizer(parser.document, num_sentences)
	summarized_text = ' '.join([str(sentence) for sentence in summary])
	return summarized_text

	def clean_text(text):
	"""
	Cleans the text by removing unwanted characters and formatting.
	"""
	text = re.sub(r'\s+', ' ', text)
	text = text.strip()
	return text

	def generate_questions(summary, num_questions=3):
	"""
	Generates questions based on the summarized text using a question-generation pipeline.
	"""
	question_generator = pipeline('e2e-qg')
	questions = question_generator(summary, max_questions=num_questions)
	return questions

	def strip_md(text):
	"""
	Removes markdown formatting from the text.
	"""
	text = text.replace("*", "").replace("", "").replace("#", "")
	text = re.sub(r'([!*_=~-])', r'\\\1', text)
	return text

	def create_document():
	"""
	Creates a new Word document with a predefined heading.
	"""
	doc = Document()
	doc.add_heading("Business Proposal", 0)
	return doc

	def add_section_to_doc(doc, section_name, section_content):
	"""
	Adds a new section with the given name and content to the Word document.
	"""
	section_content = strip_md(section_content)
	section_content = section_content.replace("\\", "") # Remove backslashes
	doc.add_heading(section_name, level=1)
	doc.add_paragraph(section_content)
	return doc

	def get_docx_bytes(doc):
	"""
	Converts the Word document to bytes for downloading.
	"""
	doc_io = io.BytesIO()
	doc.save(doc_io)
	doc_io.seek(0)
	return doc_io