pitch_deck / app.py
Ephraimmm's picture
Update app.py
e321da8 verified
# -*- coding: utf-8 -*-
"""Pitch_deck
# A full business solution
## Now we will take our project from Day 1 to the next level
### BUSINESS CHALLENGE:
Create a product that builds a pitch deck for a company to be used for prospective clients, investors and potential recruits.
"""
# import the important
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI
# Initialize and constants
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
print("API key looks good so far")
else:
print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
MODEL = 'gpt-4o-mini'
openai = OpenAI()
# A class to represent a Webpage
# Some websites need you to use proper headers when fetching them:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}
class Website:
"""
A utility class to represent a Website that we have scraped, now with links
"""
def __init__(self, url):
self.url = url
response = requests.get(url, headers=headers)
self.body = response.content
soup = BeautifulSoup(self.body, 'html.parser')
self.title = soup.title.string if soup.title else "No title found"
if soup.body:
for irrelevant in soup.body(["script", "style", "img", "input"]):
irrelevant.decompose()
self.text = soup.body.get_text(separator="\n", strip=True)
else:
self.text = ""
links = [link.get('href') for link in soup.find_all('a')]
self.links = [link for link in links if link]
def get_contents(self):
return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
link_system_prompted = "You are to choose the links that are relevant in terms of marketing in this website and then \
and then you are to follow through with picking them out. Do not choose privacy link, and anyother irrelevant ones\n"
link_system_prompted += "Respond in Json"
link_system_prompted += """Follow this example \
"links ={type: about us : "https://something/somethinhg"}"""
def link_user_prompted(website):
User_prompter = f"I would like to know the relevant link in the {website.url} - "
User_prompter += "Please decide which of the links is more relevant and also ignore privacy policy and emails"
User_prompter += "\n" .join(website.links)
return User_prompter
def get_stuff(Url):
website = Website.url
response =openai.chat.completion.create(
model = Model,
messages=[
{"role": "system", "content": link_user_prompted},
{"role": "user", "content": link_user_prompted(website)}
],
response_format={"type": "json_object"}
)
result = response.choices[0].message.content
return json.loads(result)
"""## First step: Have GPT-4o-mini figure out which links are relevant
"""
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
"links": [
{"type": "about page", "url": "https://full.url/goes/here/about"},
{"type": "careers page": "url": "https://another.full.url/careers"}
]
}
"""
def get_links_user_prompt(website):
user_prompt = f"Here is the list of links on the website of {website.url} - "
user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
user_prompt += "Links (some might be relative links):\n"
user_prompt += "\n".join(website.links)
return user_prompt
def get_links(url):
website = Website(url)
response = openai.chat.completions.create(
model= MODEL,
messages=[
{"role": "system", "content": link_system_prompt},
{"role": "user", "content": get_links_user_prompt(website)}
],
response_format={"type": "json_object"}
)
result = response.choices[0].message.content
return json.loads(result)
# sample
"""## Second step: make the pitch deck!
Assemble all the details into another prompt to GPT4-o
"""
def get_all_details(url):
result = "Landing page:\n"
result += Website(url).get_contents()
links = get_links(url)
print("Found links:", links)
for link in links["links"]:
result += f"\n\n{link['type']}\n"
result += Website(link["url"]).get_contents()
return result
## system prompt updates
# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short pitch deck for the company, so the comapany can use it to apply for mor jobs and clients, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."
system_prompt = """
You are a business analyst specializing in creating compelling pitch decks from company website content. Respond in markdown.\
Your task is to analyze multiple relevant pages from a company's website and synthesize the information into a concise, \
professional pitch deck that the company can use for business development, investor relations.
Your pitch deck should be formatted in markdown and include the following sections when information is available:
- Company overview and mission
- Products/services and value proposition
- Target market and customer base
- Company culture and values
- Growth potential and achievements
- Contact information
- End with We are always happy to help you with (list the services again)
Focus on creating a narrative that highlights the company's strengths, unique positioning, and opportunities for partnership,\
investment. Ensure the content is professional, engaging, and suitable for multiple audiences including potential clients and investors."""
# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."
#end
def get_pitch_user_prompt(company_name, url):
user_prompt = f"You are looking at a company called: {company_name}\n"
user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short pitch deck for the company, of the company in markdown.\n"
user_prompt += get_all_details(url)
user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
return user_prompt
def pitch_deck(company_name, url):
response = openai.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": get_pitch_user_prompt(company_name, url)}
],
)
result = response.choices[0].message.content
display(Markdown(result))
"""## Finally - a minor improvement
With a small adjustment, we can change this so that the results stream back from OpenAI,
with the familiar typewriter animation
"""
def stream_pitchdeck(company_name, url):
stream = openai.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": get_pitch_user_prompt(company_name, url)}
],
stream=True
)
response = ""
display_handle = display(Markdown(""), display_id=True)
for chunk in stream:
response += chunk.choices[0].delta.content or ''
response = response.replace("```","").replace("markdown", "")
update_display(Markdown(response), display_id=display_handle.display_id)
#gradio
import gradio as gr
def stream_pitchdeck2(company_name, url):
if not company_name or not url:
return "Please provide both company name and URL.URL should start with https://"
user_prompt = get_pitch_user_prompt(company_name, url)
if user_prompt == "Could not retrieve website details to generate pitch deck.":
return user_prompt
try:
stream = openai.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
stream=True
)
response = ""
for chunk in stream:
response += chunk.choices[0].delta.content or ''
response = response.replace("```","").replace("markdown", "")
yield response # Use yield for Gradio streaming
return response
except Exception as e:
return f"An error occurred during OpenAI API call: {e}"
# Create the Gradio interface
iface2 = gr.Interface(
fn=stream_pitchdeck2,
inputs=[
gr.Textbox(label="Company Name", value = "Diamond Adverts"),
gr.Textbox(label="Company Website URL", value = "https://www.diamondadverts.com")
],
outputs=gr.Markdown(label="Generated Pitch Deck"),
title="Company Pitch Deck Generator",
description="Enter the company name and website URL to generate a pitch deck based on the website content. URL should start with https://"
)
iface2.launch(debug = True, share=True)
# import gradio as gr
# #create the Gradio interface
# iface = gr.Interface(
# fn=stream_pitchdeck,
# inputs=[
# gr.Textbox(label="Company Name"),
# gr.Textbox(label="Company Website URL")
# ],
# outputs=gr.Markdown(label="Generated Pitch Deck"),
# title="Company Pitch Deck Generator",
# description="Enter the company name and website URL to generate a pitch deck based on the website content."
# )
# # Launch the Gradio app
# iface.launch()