Spaces:
Sleeping
Sleeping
| import re | |
| import openai | |
| import concurrent.futures | |
| import os | |
| import sys | |
| def convert_to_dict(text): | |
| """Converts text in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun." to a dictionary. | |
| Args: | |
| text: A string in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun.". | |
| Returns: | |
| A dictionary mapping the seconds to the text. | |
| """ | |
| result = {} | |
| for line in text.splitlines(): | |
| match = re.match(r"(\d+\.\d+) seconds - (.*)", line) | |
| if match: | |
| seconds = float(match.group(1)) | |
| text = match.group(2) | |
| result[seconds] = text | |
| return result | |
| def process_dict(text, batch_size=20): | |
| """Processes a dictionary by combining the values of 20 elements into a single string. | |
| Args: | |
| dict_in: A dictionary mapping seconds to text. | |
| batch_size: The number of elements to combine into a single string. | |
| Returns: | |
| A new dictionary mapping the seconds of the first element in each batch to the combined values of the 20 elements in the batch. | |
| """ | |
| dict_in = convert_to_dict(text) | |
| result = {} | |
| current_batch = [] | |
| current_key = None | |
| for seconds, text in dict_in.items(): | |
| if current_key is None: | |
| current_key = seconds | |
| current_batch.append(text) | |
| if len(current_batch) == batch_size: | |
| combined_value = " ".join(current_batch) | |
| result[current_key] = combined_value | |
| current_batch = [] | |
| current_key = None | |
| if current_batch: | |
| combined_value = " ".join(current_batch) | |
| result[current_key] = combined_value | |
| return result | |
| def call3(chunk): | |
| response = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo", | |
| temperature= 0, | |
| messages=[ | |
| {"role": "system", "content": "You are a podcast chunk summarizer. You will be given a random chunk from a podcast transcript. you will return 3 most important topics (or less if necessary) from that chunk as bulleted point as output. Make the bullet points as concise and informative as possible."}, | |
| {"role": "user", "content": str(chunk)} | |
| ] | |
| ) | |
| return response['choices'][0]['message']['content'] | |
| def run_gpt_3(dict_in, function=call3): | |
| """Processes a dictionary by sending all the elements individually to a function and waiting for all of them to finish. | |
| Args: | |
| dict_in: A dictionary mapping keys to values. | |
| function: A function that takes a value as input and returns a value as output. | |
| Returns: | |
| A dictionary mapping the same keys as the input dictionary to the results of calling the function on each value. | |
| """ | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| futures = [executor.submit(function, value) for value in dict_in.values()] | |
| results = [future.result() for future in futures] | |
| return dict(zip(dict_in.keys(), results)) | |
| def call4(chunk): | |
| response = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo", | |
| temperature= .3, | |
| messages=[ | |
| {"role": "system", "content": """You are a podcast summarizer. You will be given the chunked gist of a long podcast, each chunk will have it's timestamp in seconds. | |
| Output in a python dictionary format whose structure is this: | |
| { | |
| hook: "the hook" | |
| summary: "summary" | |
| chapters: { | |
| timestamp : "chapter" | |
| timestamp : "chapter" | |
| } | |
| }when | |
| hook: (Begin your podcast show notes with a gripping quote, anecdote, or question.) | |
| Ex.One serendipitous relationship led him to start a company & change his life forever. | |
| summary: Include main talking points and key phrases that will appeal to your | |
| ideal listener. keep it concise. | |
| chapters: analyze the whole input, and extract only the most important topics. Remove as much filler and unnecessary info and details as possible. | |
| """}, | |
| {"role": "user", "content": str(chunk)} | |
| ] | |
| ) | |
| return response['choices'][0]['message']['content'] | |
| def clean_and_concatenate_dict_values(dict_in): | |
| """Cleans and concatenates the values of a dictionary. before sending to 4 | |
| Args: | |
| dict_in: A dictionary mapping keys to values. | |
| Returns: | |
| A long string containing the concatenated values of the dictionary, with each value preceded by its key. | |
| """ | |
| result = "" | |
| for key, value in dict_in.items(): | |
| # Clean the value | |
| value = value.strip() | |
| value = value.replace("- ", "") | |
| # Concatenate the value to the result | |
| result += f"{key}: {value}\n" | |
| return result | |
| # text = """ | |
| # 6.08 seconds - Yeah, the Jack Carr one was pretty fun. | |
| # 11.32 seconds - He's super nice. | |
| # 16.56 seconds - I'm really enjoying this book. | |
| # 21.80 seconds - I can't wait to see what happens next. | |
| # 27.04 seconds - This is a great read. | |
| # 32.28 seconds - I highly recommend it to anyone who enjoys thrillers. | |
| # """ | |
| # result = convert_to_dict(text) | |
| # new_result = process_dict(result) | |
| # # print(list(new_result.values())[7]) | |
| # new_result |