Spaces:
Build error
Build error
Rename app_url_probl# functions.py import os import io import re import requests from docx import Document from newspaper import Article from langdetect import detect from sumy.parsers.plaintext import PlaintextParser from sumy.nlp.tokenizers import Tokenizer from sumy.summarizers.lsa import LsaSummarizer from transformers import pipeline import nltk nltk.download('punkt') def extract_content_from_url(url): """ Extracts the title and text content from a given URL using the newspaper3k library. """ try: article = Article(url) article.download() article.parse() title = article.title text = article.text return {"title": title, "text": text} except Exception as e: print(f"Error extracting content from URL: {e}") return {"title": "", "text": ""} def summarize_text(text, num_sentences=5): """ Summarizes the given text using the LSA summarizer from the Sumy library. """ try: language = detect(text) except: language = 'english' parser = PlaintextParser.from_string(text, Tokenizer(language)) summarizer = LsaSummarizer() summary = summarizer(parser.document, num_sentences) summarized_text = ' '.join([str(sentence) for sentence in summary]) return summarized_text def clean_text(text): """ Cleans the text by removing unwanted characters and formatting. """ text = re.sub(r'\s+', ' ', text) text = text.strip() return text def generate_questions(summary, num_questions=3): """ Generates questions based on the summarized text using a question-generation pipeline. """ question_generator = pipeline('e2e-qg') questions = question_generator(summary, max_questions=num_questions) return questions def strip_md(text): """ Removes markdown formatting from the text. """ text = text.replace("**", "").replace("*", "").replace("#", "") text = re.sub(r'([!*_=~-])', r'\\\1', text) return text def create_document(): """ Creates a new Word document with a predefined heading. """ doc = Document() doc.add_heading("Business Proposal", 0) return doc def add_section_to_doc(doc, section_name, section_content): """ Adds a new section with the given name and content to the Word document. """ section_content = strip_md(section_content) section_content = section_content.replace("\\", "") # Remove backslashes doc.add_heading(section_name, level=1) doc.add_paragraph(section_content) return doc def get_docx_bytes(doc): """ Converts the Word document to bytes for downloading. """ doc_io = io.BytesIO() doc.save(doc_io) doc_io.seek(0) return doc_io to app_url_problem.py
472e45b
verified
| import streamlit as st | |
| from docx import Document | |
| import re | |
| import io | |
| import os | |
| from fpdf import FPDF | |
| from groq import Groq | |
| from exa_py import Exa | |
| from dotenv import load_dotenv | |
| from retrying import retry | |
| from business_plan_functions import * | |
| import json | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| # Declare the exa search API | |
| exa = Exa(api_key=os.getenv("EXA_API_KEY")) | |
| # Define your API Model and key | |
| client = Groq(api_key=os.getenv("GROQ_API_KEY")) | |
| utilized_model = "llama3-70b-8192" | |
| def strip_md(text): | |
| text = text.replace("**", "").replace("*", "").replace("#", "") | |
| return re.sub(r'([!*_=~-])', r'\\\1', text) | |
| def create_document(): | |
| doc = Document() | |
| doc.add_heading("Business Proposal", 0) | |
| return doc | |
| def add_section_to_doc(doc, section_name, section_content): | |
| section_content = strip_md(section_content) | |
| section_content = section_content.replace("\\", "") # Remove backslashes | |
| doc.add_heading(section_name, level=1) | |
| doc.add_paragraph(section_content) | |
| return doc | |
| def get_docx_bytes(doc): | |
| doc_io = io.BytesIO() | |
| doc.save(doc_io) | |
| doc_io.seek(0) | |
| return doc_io | |
| def collect_basic_info(): | |
| st.title("Business Proposal Generator") | |
| # Get the URL from the user | |
| url = st.text_input("Enter the URL of the company's website") | |
| if st.button('Submit'): | |
| # Collect data from the provided URL | |
| data = collect_business_info(url) | |
| if data and isinstance(data, dict): | |
| # Create a new document | |
| doc = create_document() | |
| # Process and update document with each section | |
| sections_to_process = [ | |
| ("Executive Summary", generate_executive_summary), | |
| ("Mission Statement", generate_mission), | |
| ("Vision Statement", generate_vision), | |
| ("Objectives", generate_objectives), | |
| ("Core Values", generate_core_values), | |
| ("Business Description Analysis", generate_business_description), | |
| ("Company Location", generate_company_location), | |
| ("Products", generate_products), | |
| ("Ownership", generate_ownership), | |
| ("Company Structure", generate_company_structure), | |
| ("Management Profiles", generate_management_profiles), | |
| ("Operational Strategy", generate_operational_strategy), | |
| ("Marketing Mix Strategy", generate_marketing_mix), | |
| ("Promotional Strategy", generate_promotional_strategy), | |
| ("Market Demand Analysis", analyze_demand), | |
| ("Market Segment Analysis", segment_market), | |
| ("Competitor Analysis", analyze_competitors), | |
| ("Porter's Five Forces Analysis", perform_porters_five_forces), | |
| ("Industry Analysis", analyze_industry_accommodation), | |
| ("Major Player Analysis", list_major_players), | |
| ("Business Sub Sector Analysis", analyze_business_sub_sector), | |
| ("SWOT Analysis", generate_swot_analysis), | |
| ("Funding Request", generate_funding_request), | |
| ("Financing & Bank Loan Amortization", create_financing_plan), | |
| ("Income Statement Analysis", generate_pro_forma_income_statement), | |
| ("Revenue Expense Analysis", predict_revenue_expenses), | |
| ("Monthly Cash Flow Analysis", generate_monthly_cash_flow), | |
| ("Pro Forma Annual Cash Flow Analysis", generate_pro_forma_annual_cash_flow), | |
| ("Pro Forma Balance Sheet Analysis", generate_pro_forma_balance_sheet), | |
| ("Break-Even Analysis", perform_break_even_analysis), | |
| ("Payback Period Analysis", calculate_payback_period), | |
| ("Financial Graphs Analysis", generate_financial_graphs), | |
| ("Risk Mitigations Analysis", identify_risks_mitigations) | |
| ] | |
| for section_name, generate_prompt_func in sections_to_process: | |
| prompt = generate_prompt_func(data) | |
| section_content = call_llm(prompt) | |
| st.subheader(section_name) | |
| st.write(section_content) | |
| # Update document and create download link | |
| doc = add_section_to_doc(doc, section_name, section_content) | |
| doc_bytes = get_docx_bytes(doc) | |
| st.download_button( | |
| label=f"Download {section_name} as DOCX", | |
| data=doc_bytes, | |
| file_name=f"{section_name.replace(' ', '_').lower()}.docx", | |
| mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" | |
| ) | |
| else: | |
| st.error("Failed to collect business information from the provided URL.") | |
| if __name__ == "__main__": | |
| collect_basic_info() |