Bussiness-plan-make-3

Build error

App Files Files Community

SoDa12321 commited on Sep 3, 2024

Commit

65e936e

verified ·

1 Parent(s): f716c58

Create functions.py

Browse files

Files changed (1) hide show

functions.py +98 -0

functions.py ADDED Viewed

	@@ -0,0 +1,98 @@

+# functions.py
+import os
+import io
+import re
+import requests
+from docx import Document
+from newspaper import Article
+from langdetect import detect
+from sumy.parsers.plaintext import PlaintextParser
+from sumy.nlp.tokenizers import Tokenizer
+from sumy.summarizers.lsa import LsaSummarizer
+from transformers import pipeline
+import nltk
+nltk.download('punkt')
+def extract_content_from_url(url):
+    """
+    Extracts the title and text content from a given URL using the newspaper3k library.
+    """
+    try:
+        article = Article(url)
+        article.download()
+        article.parse()
+        title = article.title
+        text = article.text
+        return {"title": title, "text": text}
+    except Exception as e:
+        print(f"Error extracting content from URL: {e}")
+        return {"title": "", "text": ""}
+def summarize_text(text, num_sentences=5):
+    """
+    Summarizes the given text using the LSA summarizer from the Sumy library.
+    """
+    try:
+        language = detect(text)
+    except:
+        language = 'english'
+    parser = PlaintextParser.from_string(text, Tokenizer(language))
+    summarizer = LsaSummarizer()
+    summary = summarizer(parser.document, num_sentences)
+    summarized_text = ' '.join([str(sentence) for sentence in summary])
+    return summarized_text
+def clean_text(text):
+    """
+    Cleans the text by removing unwanted characters and formatting.
+    """
+    text = re.sub(r'\s+', ' ', text)
+    text = text.strip()
+    return text
+def generate_questions(summary, num_questions=3):
+    """
+    Generates questions based on the summarized text using a question-generation pipeline.
+    """
+    question_generator = pipeline('e2e-qg')
+    questions = question_generator(summary, max_questions=num_questions)
+    return questions
+def strip_md(text):
+    """
+    Removes markdown formatting from the text.
+    """
+    text = text.replace("**", "").replace("*", "").replace("#", "")
+    text = re.sub(r'([!*_=~-])', r'\\\1', text)
+    return text
+def create_document():
+    """
+    Creates a new Word document with a predefined heading.
+    """
+    doc = Document()
+    doc.add_heading("Business Proposal", 0)
+    return doc
+def add_section_to_doc(doc, section_name, section_content):
+    """
+    Adds a new section with the given name and content to the Word document.
+    """
+    section_content = strip_md(section_content)
+    section_content = section_content.replace("\\", "")  # Remove backslashes
+    doc.add_heading(section_name, level=1)
+    doc.add_paragraph(section_content)
+    return doc
+def get_docx_bytes(doc):
+    """
+    Converts the Word document to bytes for downloading.
+    """
+    doc_io = io.BytesIO()
+    doc.save(doc_io)
+    doc_io.seek(0)
+    return doc_io