hivecorp's picture
Create app.py
7b90f0f verified
import gradio as gr
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
# Load the GPT-2 model and tokenizer
model_name = "openai-community/gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
def generate_transcript(input_text):
# Split the input text into smaller parts
sentences = input_text.split('. ') # Simple split by sentences
new_transcripts = []
# Generate new transcript for each chunk
for sentence in sentences:
input_ids = tokenizer.encode(sentence, return_tensors='pt')
# Generate new text
with torch.no_grad():
output = model.generate(input_ids, max_length=50, num_return_sequences=1)
new_text = tokenizer.decode(output[0], skip_special_tokens=True)
new_transcripts.append(new_text)
# Combine all output to make the final transcript
final_transcript = '. '.join(new_transcripts)
return final_transcript
# Create the Gradio interface
iface = gr.Interface(
fn=generate_transcript,
inputs=gr.Textbox(lines=10, placeholder="Enter video transcript here..."),
outputs=gr.Textbox(),
title="Transcript Generator",
description="Enter a video transcript, and this app will generate a new, similar transcript using GPT-2."
)
# Launch the app
if __name__ == "__main__":
iface.launch()