Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """Pitch_deck | |
| # A full business solution | |
| ## Now we will take our project from Day 1 to the next level | |
| ### BUSINESS CHALLENGE: | |
| Create a product that builds a pitch deck for a company to be used for prospective clients, investors and potential recruits. | |
| """ | |
| # import the important | |
| import os | |
| import requests | |
| import json | |
| from typing import List | |
| from dotenv import load_dotenv | |
| from bs4 import BeautifulSoup | |
| from IPython.display import Markdown, display, update_display | |
| from openai import OpenAI | |
| # Initialize and constants | |
| load_dotenv(override=True) | |
| api_key = os.getenv('OPENAI_API_KEY') | |
| if api_key and api_key.startswith('sk-proj-') and len(api_key)>10: | |
| print("API key looks good so far") | |
| else: | |
| print("There might be a problem with your API key? Please visit the troubleshooting notebook!") | |
| MODEL = 'gpt-4o-mini' | |
| openai = OpenAI() | |
| # A class to represent a Webpage | |
| # Some websites need you to use proper headers when fetching them: | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36" | |
| } | |
| class Website: | |
| """ | |
| A utility class to represent a Website that we have scraped, now with links | |
| """ | |
| def __init__(self, url): | |
| self.url = url | |
| response = requests.get(url, headers=headers) | |
| self.body = response.content | |
| soup = BeautifulSoup(self.body, 'html.parser') | |
| self.title = soup.title.string if soup.title else "No title found" | |
| if soup.body: | |
| for irrelevant in soup.body(["script", "style", "img", "input"]): | |
| irrelevant.decompose() | |
| self.text = soup.body.get_text(separator="\n", strip=True) | |
| else: | |
| self.text = "" | |
| links = [link.get('href') for link in soup.find_all('a')] | |
| self.links = [link for link in links if link] | |
| def get_contents(self): | |
| return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n" | |
| link_system_prompted = "You are to choose the links that are relevant in terms of marketing in this website and then \ | |
| and then you are to follow through with picking them out. Do not choose privacy link, and anyother irrelevant ones\n" | |
| link_system_prompted += "Respond in Json" | |
| link_system_prompted += """Follow this example \ | |
| "links ={type: about us : "https://something/somethinhg"}""" | |
| def link_user_prompted(website): | |
| User_prompter = f"I would like to know the relevant link in the {website.url} - " | |
| User_prompter += "Please decide which of the links is more relevant and also ignore privacy policy and emails" | |
| User_prompter += "\n" .join(website.links) | |
| return User_prompter | |
| def get_stuff(Url): | |
| website = Website.url | |
| response =openai.chat.completion.create( | |
| model = Model, | |
| messages=[ | |
| {"role": "system", "content": link_user_prompted}, | |
| {"role": "user", "content": link_user_prompted(website)} | |
| ], | |
| response_format={"type": "json_object"} | |
| ) | |
| result = response.choices[0].message.content | |
| return json.loads(result) | |
| """## First step: Have GPT-4o-mini figure out which links are relevant | |
| """ | |
| link_system_prompt = "You are provided with a list of links found on a webpage. \ | |
| You are able to decide which of the links would be most relevant to include in a brochure about the company, \ | |
| such as links to an About page, or a Company page, or Careers/Jobs pages.\n" | |
| link_system_prompt += "You should respond in JSON as in this example:" | |
| link_system_prompt += """ | |
| { | |
| "links": [ | |
| {"type": "about page", "url": "https://full.url/goes/here/about"}, | |
| {"type": "careers page": "url": "https://another.full.url/careers"} | |
| ] | |
| } | |
| """ | |
| def get_links_user_prompt(website): | |
| user_prompt = f"Here is the list of links on the website of {website.url} - " | |
| user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \ | |
| Do not include Terms of Service, Privacy, email links.\n" | |
| user_prompt += "Links (some might be relative links):\n" | |
| user_prompt += "\n".join(website.links) | |
| return user_prompt | |
| def get_links(url): | |
| website = Website(url) | |
| response = openai.chat.completions.create( | |
| model= MODEL, | |
| messages=[ | |
| {"role": "system", "content": link_system_prompt}, | |
| {"role": "user", "content": get_links_user_prompt(website)} | |
| ], | |
| response_format={"type": "json_object"} | |
| ) | |
| result = response.choices[0].message.content | |
| return json.loads(result) | |
| # sample | |
| """## Second step: make the pitch deck! | |
| Assemble all the details into another prompt to GPT4-o | |
| """ | |
| def get_all_details(url): | |
| result = "Landing page:\n" | |
| result += Website(url).get_contents() | |
| links = get_links(url) | |
| print("Found links:", links) | |
| for link in links["links"]: | |
| result += f"\n\n{link['type']}\n" | |
| result += Website(link["url"]).get_contents() | |
| return result | |
| ## system prompt updates | |
| # system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \ | |
| # and creates a short pitch deck for the company, so the comapany can use it to apply for mor jobs and clients, investors and recruits. Respond in markdown.\ | |
| # Include details of company culture, customers and careers/jobs if you have the information." | |
| system_prompt = """ | |
| You are a business analyst specializing in creating compelling pitch decks from company website content. Respond in markdown.\ | |
| Your task is to analyze multiple relevant pages from a company's website and synthesize the information into a concise, \ | |
| professional pitch deck that the company can use for business development, investor relations. | |
| Your pitch deck should be formatted in markdown and include the following sections when information is available: | |
| - Company overview and mission | |
| - Products/services and value proposition | |
| - Target market and customer base | |
| - Company culture and values | |
| - Growth potential and achievements | |
| - Contact information | |
| - End with We are always happy to help you with (list the services again) | |
| Focus on creating a narrative that highlights the company's strengths, unique positioning, and opportunities for partnership,\ | |
| investment. Ensure the content is professional, engaging, and suitable for multiple audiences including potential clients and investors.""" | |
| # system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \ | |
| # and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\ | |
| # Include details of company culture, customers and careers/jobs if you have the information." | |
| #end | |
| def get_pitch_user_prompt(company_name, url): | |
| user_prompt = f"You are looking at a company called: {company_name}\n" | |
| user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short pitch deck for the company, of the company in markdown.\n" | |
| user_prompt += get_all_details(url) | |
| user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters | |
| return user_prompt | |
| def pitch_deck(company_name, url): | |
| response = openai.chat.completions.create( | |
| model=MODEL, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": get_pitch_user_prompt(company_name, url)} | |
| ], | |
| ) | |
| result = response.choices[0].message.content | |
| display(Markdown(result)) | |
| """## Finally - a minor improvement | |
| With a small adjustment, we can change this so that the results stream back from OpenAI, | |
| with the familiar typewriter animation | |
| """ | |
| def stream_pitchdeck(company_name, url): | |
| stream = openai.chat.completions.create( | |
| model=MODEL, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": get_pitch_user_prompt(company_name, url)} | |
| ], | |
| stream=True | |
| ) | |
| response = "" | |
| display_handle = display(Markdown(""), display_id=True) | |
| for chunk in stream: | |
| response += chunk.choices[0].delta.content or '' | |
| response = response.replace("```","").replace("markdown", "") | |
| update_display(Markdown(response), display_id=display_handle.display_id) | |
| #gradio | |
| import gradio as gr | |
| def stream_pitchdeck2(company_name, url): | |
| if not company_name or not url: | |
| return "Please provide both company name and URL.URL should start with https://" | |
| user_prompt = get_pitch_user_prompt(company_name, url) | |
| if user_prompt == "Could not retrieve website details to generate pitch deck.": | |
| return user_prompt | |
| try: | |
| stream = openai.chat.completions.create( | |
| model=MODEL, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ], | |
| stream=True | |
| ) | |
| response = "" | |
| for chunk in stream: | |
| response += chunk.choices[0].delta.content or '' | |
| response = response.replace("```","").replace("markdown", "") | |
| yield response # Use yield for Gradio streaming | |
| return response | |
| except Exception as e: | |
| return f"An error occurred during OpenAI API call: {e}" | |
| # Create the Gradio interface | |
| iface2 = gr.Interface( | |
| fn=stream_pitchdeck2, | |
| inputs=[ | |
| gr.Textbox(label="Company Name", value = "Diamond Adverts"), | |
| gr.Textbox(label="Company Website URL", value = "https://www.diamondadverts.com") | |
| ], | |
| outputs=gr.Markdown(label="Generated Pitch Deck"), | |
| title="Company Pitch Deck Generator", | |
| description="Enter the company name and website URL to generate a pitch deck based on the website content. URL should start with https://" | |
| ) | |
| iface2.launch(debug = True, share=True) | |
| # import gradio as gr | |
| # #create the Gradio interface | |
| # iface = gr.Interface( | |
| # fn=stream_pitchdeck, | |
| # inputs=[ | |
| # gr.Textbox(label="Company Name"), | |
| # gr.Textbox(label="Company Website URL") | |
| # ], | |
| # outputs=gr.Markdown(label="Generated Pitch Deck"), | |
| # title="Company Pitch Deck Generator", | |
| # description="Enter the company name and website URL to generate a pitch deck based on the website content." | |
| # ) | |
| # # Launch the Gradio app | |
| # iface.launch() |