Ephraimmm commited on
Commit
3f9098e
·
verified ·
1 Parent(s): 1dd17ea

Upload pitch_deck.py

Browse files
Files changed (1) hide show
  1. pitch_deck.py +383 -0
pitch_deck.py ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Pitch_deck
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1ryLKlHjf3azJ4gp_klTOF2zhSOn01cSH
8
+
9
+ # A full business solution
10
+
11
+ ## Now we will take our project from Day 1 to the next level
12
+
13
+ ### BUSINESS CHALLENGE:
14
+
15
+ Create a product that builds a Brochure for a company to be used for prospective clients, investors and potential recruits.
16
+
17
+ We will be provided a company name and their primary website.
18
+
19
+ See the end of this notebook for examples of real-world business applications.
20
+
21
+ And remember: I'm always available if you have problems or ideas! Please do reach out.
22
+ """
23
+
24
+ # !pip install dotenv
25
+ # !pip install openai
26
+ # !pip install bs4
27
+ # !pip install requests
28
+ # !pip install python-dotenv
29
+ # !pip install IPython
30
+
31
+ # import the important
32
+
33
+ import os
34
+ import requests
35
+ import json
36
+ from typing import List
37
+ from dotenv import load_dotenv
38
+ from bs4 import BeautifulSoup
39
+ from IPython.display import Markdown, display, update_display
40
+ from openai import OpenAI
41
+
42
+ #initilize the openai key
43
+ from google.colab import userdata
44
+ open_ai_key = userdata.get('OPENAI_API_KEY')
45
+ grok_key = userdata.get('XAI_API_KEY')
46
+ openai = OpenAI(api_key = open_ai_key)
47
+ MODEL = 'gpt-4o-mini'
48
+
49
+ # Initialize and constants
50
+
51
+ # load_dotenv(override=True)
52
+ # api_key = os.getenv('OPENAI_API_KEY')
53
+
54
+ # if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
55
+ # print("API key looks good so far")
56
+ # else:
57
+ # print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
58
+
59
+ # MODEL = 'gpt-4o-mini'
60
+ # openai = OpenAI()
61
+
62
+ # A class to represent a Webpage
63
+
64
+ # Some websites need you to use proper headers when fetching them:
65
+ headers = {
66
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
67
+ }
68
+
69
+ class Website:
70
+ """
71
+ A utility class to represent a Website that we have scraped, now with links
72
+ """
73
+
74
+ def __init__(self, url):
75
+ self.url = url
76
+ response = requests.get(url, headers=headers)
77
+ self.body = response.content
78
+ soup = BeautifulSoup(self.body, 'html.parser')
79
+ self.title = soup.title.string if soup.title else "No title found"
80
+ if soup.body:
81
+ for irrelevant in soup.body(["script", "style", "img", "input"]):
82
+ irrelevant.decompose()
83
+ self.text = soup.body.get_text(separator="\n", strip=True)
84
+ else:
85
+ self.text = ""
86
+ links = [link.get('href') for link in soup.find_all('a')]
87
+ self.links = [link for link in links if link]
88
+
89
+ def get_contents(self):
90
+ return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
91
+
92
+ # ted = Website("https://invoice.2go.com")
93
+ # ted.links
94
+
95
+ link_system_prompted = "You are to choose the links that are relevant in terms of marketing in this website and then \
96
+ and then you are to follow through with picking them out. Do not choose privacy link, and anyother irrelevant ones\n"
97
+ link_system_prompted += "Respond in Json"
98
+ link_system_prompted += """Follow this example \
99
+ "links ={type: about us : "https://something/somethinhg"}"""
100
+
101
+ link_system_prompted
102
+
103
+ def link_user_prompted(website):
104
+ User_prompter = f"I would like to know the relevant link in the {website.url} - "
105
+ User_prompter += "Please decide which of the links is more relevant and also ignore privacy policy and emails"
106
+ User_prompter += "\n" .join(website.links)
107
+ return User_prompter
108
+
109
+ link_user_prompted(ted)
110
+
111
+ def get_stuff(Url):
112
+ website = Website.url
113
+ response =openai.chat.completion.create(
114
+ model = Model,
115
+ messages=[
116
+ {"role": "system", "content": link_user_prompted},
117
+ {"role": "user", "content": link_user_prompted(website)}
118
+ ],
119
+ response_format={"type": "json_object"}
120
+ )
121
+ result = response.choices[0].message.content
122
+ return json.loads(result)
123
+
124
+ # ed = Website("https://squeakycleaningservices.co.uk")
125
+ # ed.links
126
+
127
+ """## First step: Have GPT-4o-mini figure out which links are relevant
128
+
129
+ ### Use a call to gpt-4o-mini to read the links on a webpage, and respond in structured JSON.
130
+ It should decide which links are relevant, and replace relative links such as "/about" with "https://company.com/about".
131
+ We will use "one shot prompting" in which we provide an example of how it should respond in the prompt.
132
+
133
+ This is an excellent use case for an LLM, because it requires nuanced understanding. Imagine trying to code this without LLMs by parsing and analyzing the webpage - it would be very hard!
134
+
135
+ Sidenote: there is a more advanced technique called "Structured Outputs" in which we require the model to respond according to a spec. We cover this technique in Week 8 during our autonomous Agentic AI project.
136
+ """
137
+
138
+ link_system_prompt = "You are provided with a list of links found on a webpage. \
139
+ You are able to decide which of the links would be most relevant to include in a brochure about the company, \
140
+ such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
141
+ link_system_prompt += "You should respond in JSON as in this example:"
142
+ link_system_prompt += """
143
+ {
144
+ "links": [
145
+ {"type": "about page", "url": "https://full.url/goes/here/about"},
146
+ {"type": "careers page": "url": "https://another.full.url/careers"}
147
+ ]
148
+ }
149
+ """
150
+
151
+ print(link_system_prompt)
152
+
153
+ def get_links_user_prompt(website):
154
+ user_prompt = f"Here is the list of links on the website of {website.url} - "
155
+ user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
156
+ Do not include Terms of Service, Privacy, email links.\n"
157
+ user_prompt += "Links (some might be relative links):\n"
158
+ user_prompt += "\n".join(website.links)
159
+ return user_prompt
160
+
161
+ print(get_links_user_prompt(ed))
162
+
163
+ def get_links(url):
164
+ website = Website(url)
165
+ response = openai.chat.completions.create(
166
+ model= MODEL,
167
+ messages=[
168
+ {"role": "system", "content": link_system_prompt},
169
+ {"role": "user", "content": get_links_user_prompt(website)}
170
+ ],
171
+ response_format={"type": "json_object"}
172
+ )
173
+ result = response.choices[0].message.content
174
+ return json.loads(result)
175
+
176
+ # sample
177
+
178
+ Diamondadverts = Website("https://diamondadverts.com")
179
+ Diamondadverts.links
180
+
181
+ get_links("https://squeakycleaningservices.co.uk")
182
+
183
+
184
+
185
+ """## Second step: make the pitch deck!
186
+
187
+ Assemble all the details into another prompt to GPT4-o
188
+ """
189
+
190
+ def get_all_details(url):
191
+ result = "Landing page:\n"
192
+ result += Website(url).get_contents()
193
+ links = get_links(url)
194
+ print("Found links:", links)
195
+ for link in links["links"]:
196
+ result += f"\n\n{link['type']}\n"
197
+ result += Website(link["url"]).get_contents()
198
+ return result
199
+
200
+ print(get_all_details("https://squeakycleaningservices.co.uk"))
201
+
202
+ # system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
203
+ # and creates a short pitch deck for the company, so the comapany can use it to apply for mor jobs and clients, investors and recruits. Respond in markdown.\
204
+ # Include details of company culture, customers and careers/jobs if you have the information."
205
+
206
+ system_prompt = """
207
+ You are a business analyst specializing in creating compelling pitch decks from company website content. Respond in markdown.\
208
+ Your task is to analyze multiple relevant pages from a company's website and synthesize the information into a concise, \
209
+ professional pitch deck that the company can use for business development, investor relations.
210
+
211
+ Your pitch deck should be formatted in markdown and include the following sections when information is available:
212
+ - Company overview and mission
213
+ - Products/services and value proposition
214
+ - Target market and customer base
215
+ - Company culture and values
216
+ - Growth potential and achievements
217
+ - Contact information
218
+ - End with We are always happy to help you with (list the services again)
219
+
220
+ Focus on creating a narrative that highlights the company's strengths, unique positioning, and opportunities for partnership,\
221
+ investment. Ensure the content is professional, engaging, and suitable for multiple audiences including potential clients and investors."""
222
+
223
+
224
+ # system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
225
+ # and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
226
+ # Include details of company culture, customers and careers/jobs if you have the information."
227
+
228
+ def get_pitch_user_prompt(company_name, url):
229
+ user_prompt = f"You are looking at a company called: {company_name}\n"
230
+ user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short pitch deck for the company, of the company in markdown.\n"
231
+ user_prompt += get_all_details(url)
232
+ user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
233
+ return user_prompt
234
+
235
+ get_pitch_user_prompt("Squeaky cleaning Services", "https://squeakycleaningservices.co.uk")
236
+
237
+ def pitch_deck(company_name, url):
238
+ response = openai.chat.completions.create(
239
+ model=MODEL,
240
+ messages=[
241
+ {"role": "system", "content": system_prompt},
242
+ {"role": "user", "content": get_pitch_user_prompt(company_name, url)}
243
+ ],
244
+ )
245
+ result = response.choices[0].message.content
246
+ display(Markdown(result))
247
+
248
+ pitch_deck("Squeaky Cleaning Services", "https://squeakycleaningservices.co.uk")
249
+
250
+ """## Finally - a minor improvement
251
+
252
+ With a small adjustment, we can change this so that the results stream back from OpenAI,
253
+ with the familiar typewriter animation
254
+ """
255
+
256
+ def stream_pitchdeck(company_name, url):
257
+ stream = openai.chat.completions.create(
258
+ model=MODEL,
259
+ messages=[
260
+ {"role": "system", "content": system_prompt},
261
+ {"role": "user", "content": get_pitch_user_prompt(company_name, url)}
262
+ ],
263
+ stream=True
264
+ )
265
+
266
+ response = ""
267
+ display_handle = display(Markdown(""), display_id=True)
268
+ for chunk in stream:
269
+ response += chunk.choices[0].delta.content or ''
270
+ response = response.replace("```","").replace("markdown", "")
271
+ update_display(Markdown(response), display_id=display_handle.display_id)
272
+
273
+ stream_pitchdeck("Squeaky Cleaning Services", "https://squeakycleaningservices.co.uk")
274
+
275
+ # Try
276
+
277
+ # stream_pitchdeck("Squeaky Cleaning Services", "https://squeakycleaningservices.co.uk")
278
+
279
+ # prompt: Generate a gradio code from the funtion
280
+
281
+ !pip install gradio -q
282
+ import gradio as gr
283
+
284
+ def get_links_user_prompt(website):
285
+ user_prompt = f"Here is the list of links on the website of {website.url} - "
286
+ user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
287
+ Do not include Terms of Service, Privacy, email links.\n"
288
+ user_prompt += "Links (some might be relative links):\n"
289
+ user_prompt += "\n".join(website.links)
290
+ return user_prompt
291
+
292
+ system_prompt = """
293
+ You are a business analyst specializing in creating compelling pitch decks from company website content. Respond in markdown.\
294
+ Your task is to analyze multiple relevant pages from a company's website and synthesize the information into a concise, \
295
+ professional pitch deck that the company can use for business development, investor relations.
296
+
297
+ Your pitch deck should be formatted in markdown and include the following sections when information is available:
298
+ - Company overview and mission
299
+ - Products/services and value proposition
300
+ - Target market and customer base
301
+ - Company culture and values
302
+ - Growth potential and achievements
303
+ - Contact information
304
+ - End with We are always happy to help you with (list the services again)
305
+
306
+ Focus on creating a narrative that highlights the company's strengths, unique positioning, and opportunities for partnership,\
307
+ investment. Ensure the content is professional, engaging, and suitable for multiple audiences including potential clients and investors."""
308
+
309
+ def get_all_details(url):
310
+ result = "Landing page:\n"
311
+ result += Website(url).get_contents()
312
+ links = get_links(url)
313
+ print("Found links:", links)
314
+ for link in links.get("links", []):
315
+ result += f"\n\n{link.get('type', 'Unknown Type')}\n"
316
+ # Ensure the URL is valid before attempting to scrape
317
+ if link.get('url') and (link['url'].startswith('http://') or link['url'].startswith('https://')):
318
+ try:
319
+ result += Website(link["url"]).get_contents()
320
+ except Exception as e:
321
+ result += f"Could not retrieve content for {link['url']}: {e}\n"
322
+ else:
323
+ result += f"Invalid URL found: {link.get('url')}\n"
324
+ return result
325
+
326
+ def get_pitch_user_prompt(company_name, url):
327
+ user_prompt = f"You are looking at a company called: {company_name}\n"
328
+ user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short pitch deck for the company, of the company in markdown.\n"
329
+ try:
330
+ user_prompt += get_all_details(url)
331
+ except Exception as e:
332
+ user_prompt += f"Error getting website details: {e}\n"
333
+ print(f"Error getting website details for {url}: {e}")
334
+ # Provide a fallback or stop processing if website cannot be scraped
335
+ return "Could not retrieve website details to generate pitch deck."
336
+
337
+ user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
338
+ return user_prompt
339
+
340
+
341
+ def generate_pitch_deck(company_name, url):
342
+ if not company_name or not url:
343
+ return "Please provide both company name and URL."
344
+
345
+ user_prompt = get_pitch_user_prompt(company_name, url)
346
+
347
+ if user_prompt == "Could not retrieve website details to generate pitch deck.":
348
+ return user_prompt
349
+
350
+ try:
351
+ response = openai.chat.completions.create(
352
+ model=MODEL,
353
+ messages=[
354
+ {"role": "system", "content": system_prompt},
355
+ {"role": "user", "content": user_prompt}
356
+ ],
357
+ stream=True # Enable streaming for better UX in Gradio
358
+ )
359
+
360
+ # Stream the response
361
+ full_response = ""
362
+ for chunk in response:
363
+ content = chunk.choices[0].delta.content
364
+ if content is not None:
365
+ full_response += content
366
+ yield full_response # Yield intermediate results for streaming
367
+
368
+ return full_response # Return the final result
369
+ except Exception as e:
370
+ return f"An error occurred during OpenAI API call: {e}"
371
+
372
+ iface = gr.Interface(
373
+ fn=generate_pitch_deck,
374
+ inputs=[
375
+ gr.Textbox(label="Company Name"),
376
+ gr.Textbox(label="Company Website URL")
377
+ ],
378
+ outputs=gr.Markdown(label="Generated Pitch Deck"),
379
+ title="Company Pitch Deck Generator",
380
+ description="Enter the company name and website URL to generate a pitch deck based on their website content."
381
+ )
382
+
383
+ iface.launch()