# -*- coding: utf-8 -*- """Pitch_deck # A full business solution ## Now we will take our project from Day 1 to the next level ### BUSINESS CHALLENGE: Create a product that builds a pitch deck for a company to be used for prospective clients, investors and potential recruits. """ # import the important import os import requests import json from typing import List from dotenv import load_dotenv from bs4 import BeautifulSoup from IPython.display import Markdown, display, update_display from openai import OpenAI # Initialize and constants load_dotenv(override=True) api_key = os.getenv('OPENAI_API_KEY') if api_key and api_key.startswith('sk-proj-') and len(api_key)>10: print("API key looks good so far") else: print("There might be a problem with your API key? Please visit the troubleshooting notebook!") MODEL = 'gpt-4o-mini' openai = OpenAI() # A class to represent a Webpage # Some websites need you to use proper headers when fetching them: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36" } class Website: """ A utility class to represent a Website that we have scraped, now with links """ def __init__(self, url): self.url = url response = requests.get(url, headers=headers) self.body = response.content soup = BeautifulSoup(self.body, 'html.parser') self.title = soup.title.string if soup.title else "No title found" if soup.body: for irrelevant in soup.body(["script", "style", "img", "input"]): irrelevant.decompose() self.text = soup.body.get_text(separator="\n", strip=True) else: self.text = "" links = [link.get('href') for link in soup.find_all('a')] self.links = [link for link in links if link] def get_contents(self): return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n" link_system_prompted = "You are to choose the links that are relevant in terms of marketing in this website and then \ and then you are to follow through with picking them out. Do not choose privacy link, and anyother irrelevant ones\n" link_system_prompted += "Respond in Json" link_system_prompted += """Follow this example \ "links ={type: about us : "https://something/somethinhg"}""" def link_user_prompted(website): User_prompter = f"I would like to know the relevant link in the {website.url} - " User_prompter += "Please decide which of the links is more relevant and also ignore privacy policy and emails" User_prompter += "\n" .join(website.links) return User_prompter def get_stuff(Url): website = Website.url response =openai.chat.completion.create( model = Model, messages=[ {"role": "system", "content": link_user_prompted}, {"role": "user", "content": link_user_prompted(website)} ], response_format={"type": "json_object"} ) result = response.choices[0].message.content return json.loads(result) """## First step: Have GPT-4o-mini figure out which links are relevant """ link_system_prompt = "You are provided with a list of links found on a webpage. \ You are able to decide which of the links would be most relevant to include in a brochure about the company, \ such as links to an About page, or a Company page, or Careers/Jobs pages.\n" link_system_prompt += "You should respond in JSON as in this example:" link_system_prompt += """ { "links": [ {"type": "about page", "url": "https://full.url/goes/here/about"}, {"type": "careers page": "url": "https://another.full.url/careers"} ] } """ def get_links_user_prompt(website): user_prompt = f"Here is the list of links on the website of {website.url} - " user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \ Do not include Terms of Service, Privacy, email links.\n" user_prompt += "Links (some might be relative links):\n" user_prompt += "\n".join(website.links) return user_prompt def get_links(url): website = Website(url) response = openai.chat.completions.create( model= MODEL, messages=[ {"role": "system", "content": link_system_prompt}, {"role": "user", "content": get_links_user_prompt(website)} ], response_format={"type": "json_object"} ) result = response.choices[0].message.content return json.loads(result) # sample """## Second step: make the pitch deck! Assemble all the details into another prompt to GPT4-o """ def get_all_details(url): result = "Landing page:\n" result += Website(url).get_contents() links = get_links(url) print("Found links:", links) for link in links["links"]: result += f"\n\n{link['type']}\n" result += Website(link["url"]).get_contents() return result ## system prompt updates # system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \ # and creates a short pitch deck for the company, so the comapany can use it to apply for mor jobs and clients, investors and recruits. Respond in markdown.\ # Include details of company culture, customers and careers/jobs if you have the information." system_prompt = """ You are a business analyst specializing in creating compelling pitch decks from company website content. Respond in markdown.\ Your task is to analyze multiple relevant pages from a company's website and synthesize the information into a concise, \ professional pitch deck that the company can use for business development, investor relations. Your pitch deck should be formatted in markdown and include the following sections when information is available: - Company overview and mission - Products/services and value proposition - Target market and customer base - Company culture and values - Growth potential and achievements - Contact information - End with We are always happy to help you with (list the services again) Focus on creating a narrative that highlights the company's strengths, unique positioning, and opportunities for partnership,\ investment. Ensure the content is professional, engaging, and suitable for multiple audiences including potential clients and investors.""" # system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \ # and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\ # Include details of company culture, customers and careers/jobs if you have the information." #end def get_pitch_user_prompt(company_name, url): user_prompt = f"You are looking at a company called: {company_name}\n" user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short pitch deck for the company, of the company in markdown.\n" user_prompt += get_all_details(url) user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters return user_prompt def pitch_deck(company_name, url): response = openai.chat.completions.create( model=MODEL, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": get_pitch_user_prompt(company_name, url)} ], ) result = response.choices[0].message.content display(Markdown(result)) """## Finally - a minor improvement With a small adjustment, we can change this so that the results stream back from OpenAI, with the familiar typewriter animation """ def stream_pitchdeck(company_name, url): stream = openai.chat.completions.create( model=MODEL, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": get_pitch_user_prompt(company_name, url)} ], stream=True ) response = "" display_handle = display(Markdown(""), display_id=True) for chunk in stream: response += chunk.choices[0].delta.content or '' response = response.replace("```","").replace("markdown", "") update_display(Markdown(response), display_id=display_handle.display_id) #gradio import gradio as gr def stream_pitchdeck2(company_name, url): if not company_name or not url: return "Please provide both company name and URL.URL should start with https://" user_prompt = get_pitch_user_prompt(company_name, url) if user_prompt == "Could not retrieve website details to generate pitch deck.": return user_prompt try: stream = openai.chat.completions.create( model=MODEL, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], stream=True ) response = "" for chunk in stream: response += chunk.choices[0].delta.content or '' response = response.replace("```","").replace("markdown", "") yield response # Use yield for Gradio streaming return response except Exception as e: return f"An error occurred during OpenAI API call: {e}" # Create the Gradio interface iface2 = gr.Interface( fn=stream_pitchdeck2, inputs=[ gr.Textbox(label="Company Name", value = "Diamond Adverts"), gr.Textbox(label="Company Website URL", value = "https://www.diamondadverts.com") ], outputs=gr.Markdown(label="Generated Pitch Deck"), title="Company Pitch Deck Generator", description="Enter the company name and website URL to generate a pitch deck based on the website content. URL should start with https://" ) iface2.launch(debug = True, share=True) # import gradio as gr # #create the Gradio interface # iface = gr.Interface( # fn=stream_pitchdeck, # inputs=[ # gr.Textbox(label="Company Name"), # gr.Textbox(label="Company Website URL") # ], # outputs=gr.Markdown(label="Generated Pitch Deck"), # title="Company Pitch Deck Generator", # description="Enter the company name and website URL to generate a pitch deck based on the website content." # ) # # Launch the Gradio app # iface.launch()