Bussiness-plan-make-3 / app_url_problem.py
SoDa12321's picture
Rename app_url_probl# functions.py import os import io import re import requests from docx import Document from newspaper import Article from langdetect import detect from sumy.parsers.plaintext import PlaintextParser from sumy.nlp.tokenizers import Tokenizer from sumy.summarizers.lsa import LsaSummarizer from transformers import pipeline import nltk nltk.download('punkt') def extract_content_from_url(url): """ Extracts the title and text content from a given URL using the newspaper3k library. """ try: article = Article(url) article.download() article.parse() title = article.title text = article.text return {"title": title, "text": text} except Exception as e: print(f"Error extracting content from URL: {e}") return {"title": "", "text": ""} def summarize_text(text, num_sentences=5): """ Summarizes the given text using the LSA summarizer from the Sumy library. """ try: language = detect(text) except: language = 'english' parser = PlaintextParser.from_string(text, Tokenizer(language)) summarizer = LsaSummarizer() summary = summarizer(parser.document, num_sentences) summarized_text = ' '.join([str(sentence) for sentence in summary]) return summarized_text def clean_text(text): """ Cleans the text by removing unwanted characters and formatting. """ text = re.sub(r'\s+', ' ', text) text = text.strip() return text def generate_questions(summary, num_questions=3): """ Generates questions based on the summarized text using a question-generation pipeline. """ question_generator = pipeline('e2e-qg') questions = question_generator(summary, max_questions=num_questions) return questions def strip_md(text): """ Removes markdown formatting from the text. """ text = text.replace("**", "").replace("*", "").replace("#", "") text = re.sub(r'([!*_=~-])', r'\\\1', text) return text def create_document(): """ Creates a new Word document with a predefined heading. """ doc = Document() doc.add_heading("Business Proposal", 0) return doc def add_section_to_doc(doc, section_name, section_content): """ Adds a new section with the given name and content to the Word document. """ section_content = strip_md(section_content) section_content = section_content.replace("\\", "") # Remove backslashes doc.add_heading(section_name, level=1) doc.add_paragraph(section_content) return doc def get_docx_bytes(doc): """ Converts the Word document to bytes for downloading. """ doc_io = io.BytesIO() doc.save(doc_io) doc_io.seek(0) return doc_io to app_url_problem.py
472e45b verified
import streamlit as st
from docx import Document
import re
import io
import os
from fpdf import FPDF
from groq import Groq
from exa_py import Exa
from dotenv import load_dotenv
from retrying import retry
from business_plan_functions import *
import json
# Load environment variables from .env file
load_dotenv()
# Declare the exa search API
exa = Exa(api_key=os.getenv("EXA_API_KEY"))
# Define your API Model and key
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
utilized_model = "llama3-70b-8192"
def strip_md(text):
text = text.replace("**", "").replace("*", "").replace("#", "")
return re.sub(r'([!*_=~-])', r'\\\1', text)
def create_document():
doc = Document()
doc.add_heading("Business Proposal", 0)
return doc
def add_section_to_doc(doc, section_name, section_content):
section_content = strip_md(section_content)
section_content = section_content.replace("\\", "") # Remove backslashes
doc.add_heading(section_name, level=1)
doc.add_paragraph(section_content)
return doc
def get_docx_bytes(doc):
doc_io = io.BytesIO()
doc.save(doc_io)
doc_io.seek(0)
return doc_io
def collect_basic_info():
st.title("Business Proposal Generator")
# Get the URL from the user
url = st.text_input("Enter the URL of the company's website")
if st.button('Submit'):
# Collect data from the provided URL
data = collect_business_info(url)
if data and isinstance(data, dict):
# Create a new document
doc = create_document()
# Process and update document with each section
sections_to_process = [
("Executive Summary", generate_executive_summary),
("Mission Statement", generate_mission),
("Vision Statement", generate_vision),
("Objectives", generate_objectives),
("Core Values", generate_core_values),
("Business Description Analysis", generate_business_description),
("Company Location", generate_company_location),
("Products", generate_products),
("Ownership", generate_ownership),
("Company Structure", generate_company_structure),
("Management Profiles", generate_management_profiles),
("Operational Strategy", generate_operational_strategy),
("Marketing Mix Strategy", generate_marketing_mix),
("Promotional Strategy", generate_promotional_strategy),
("Market Demand Analysis", analyze_demand),
("Market Segment Analysis", segment_market),
("Competitor Analysis", analyze_competitors),
("Porter's Five Forces Analysis", perform_porters_five_forces),
("Industry Analysis", analyze_industry_accommodation),
("Major Player Analysis", list_major_players),
("Business Sub Sector Analysis", analyze_business_sub_sector),
("SWOT Analysis", generate_swot_analysis),
("Funding Request", generate_funding_request),
("Financing & Bank Loan Amortization", create_financing_plan),
("Income Statement Analysis", generate_pro_forma_income_statement),
("Revenue Expense Analysis", predict_revenue_expenses),
("Monthly Cash Flow Analysis", generate_monthly_cash_flow),
("Pro Forma Annual Cash Flow Analysis", generate_pro_forma_annual_cash_flow),
("Pro Forma Balance Sheet Analysis", generate_pro_forma_balance_sheet),
("Break-Even Analysis", perform_break_even_analysis),
("Payback Period Analysis", calculate_payback_period),
("Financial Graphs Analysis", generate_financial_graphs),
("Risk Mitigations Analysis", identify_risks_mitigations)
]
for section_name, generate_prompt_func in sections_to_process:
prompt = generate_prompt_func(data)
section_content = call_llm(prompt)
st.subheader(section_name)
st.write(section_content)
# Update document and create download link
doc = add_section_to_doc(doc, section_name, section_content)
doc_bytes = get_docx_bytes(doc)
st.download_button(
label=f"Download {section_name} as DOCX",
data=doc_bytes,
file_name=f"{section_name.replace(' ', '_').lower()}.docx",
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)
else:
st.error("Failed to collect business information from the provided URL.")
if __name__ == "__main__":
collect_basic_info()