Spaces:
Build error
Build error
| import json | |
| import re | |
| import textwrap | |
| import openai | |
| import gradio as gr | |
| def gpt3_completion(prompt, engine='text-davinci-002', temp=0.6, top_p=1.0, tokens=1000, freq_pen=0.25, pres_pen=0.0, stop=['<<END>>']): | |
| print("here"); | |
| max_retry = 5 | |
| retry = 0 | |
| response = openai.Completion.create( | |
| engine=engine, | |
| prompt=prompt, | |
| temperature=0.7, | |
| max_tokens=963, | |
| top_p=1, | |
| frequency_penalty=0, | |
| presence_penalty=0) | |
| text = response['choices'][0]['text'].strip() | |
| text = re.sub('\s+', ' ', text) | |
| return text | |
| def summarize_podcast(podcast_file): | |
| # Open the transcript file | |
| print("ayay hai") | |
| with open(podcast_file.name, "r") as f: | |
| # read the contents of the uploaded file | |
| transcript = f.read() | |
| res = len(transcript.split()) | |
| transcript = transcript.replace("WEBVTT", "") | |
| transcript = "\n".join(filter(None, transcript.splitlines())) | |
| transcript = re.sub(r"\d\d:\d\d:\d\d\.\d\d\d \d\d:\d\d:\d\d\.\d\d\d", "", transcript) | |
| text = transcript | |
| print(text) | |
| lines = text.split('\n') | |
| names = [] | |
| unique_list=[] | |
| for line in lines: | |
| if ':' in line: | |
| name = line.split(':')[0] | |
| names.append(name) | |
| for x in range(0,len(names)): | |
| if names[x] not in unique_list: | |
| unique_list.append(names[x]) | |
| sp="" | |
| al=["A","B","C","D","E","F","G","H","I","K","L","M","N","O","P","Q","R","S","T","V","X","Y","Z"] | |
| for x in range(0,len(unique_list)): | |
| transcript = transcript.replace(unique_list[x], al[x]) | |
| sp=sp+"Speaker "+str(x+1)+" "+unique_list[x]+"\n" | |
| transcript=sp+""+transcript | |
| openai.api_key = "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" | |
| chunks = textwrap.wrap(transcript, 8000) | |
| result = list() | |
| count = 0 | |
| tempstr="" | |
| for chunk in chunks: | |
| count = count + 1 | |
| prompt = sp+"""\n | |
| Summarize the portion of the podcast. The summary should be around 200 words. use the Name instead of A, B, C... | |
| Podcast: | |
| """+str(chunk)+"""'\n\n""" | |
| summary = gpt3_completion(prompt) | |
| summary="chunk : "+str(count)+summary | |
| result.append(summary) | |
| tempstr=tempstr+"\n"+summary | |
| return str(tempstr) | |
| inputs = gr.inputs.File(label="Upload podcast file") | |
| outputs = gr.outputs.Textbox(label="Summary") | |
| title = "Podcast Summarizer" | |
| description = "Summarize your podcast into a few key points using this app." | |
| gr.Interface(fn=summarize_podcast, inputs=inputs, outputs=outputs, title=title, description=description).launch() | |