deckhub_demo / app.py
cheeeeese's picture
Update app.py
cb567b6
# -*- coding: utf-8 -*-
import gradio as gr
from pptx import Presentation
import io
from pptx.enum.shapes import MSO_SHAPE_TYPE
import pandas as pd
import openai
import json
import re
import os
openai.api_key = os.environ.get("OPENAI_API_KEY")
def get_filename(file_obj):
return get_text(file_obj.name)
def get_text(ppt_file):
msg_fb = os.environ.get("PROMPT")
result = []
presentation = Presentation(ppt_file)
if len(presentation.slides) > 5:
raise Exception("Max slide length is 5. Please follow the guideline.")
try:
for slide_idx, slide in enumerate(presentation.slides): # ์Šฌ๋ผ์ด๋“œ ๋งˆ๋‹ค ์ฝ๊ธฐ
string = ''
# get all text in presentation
# It does not distinguish between the title and the content
for shape in slide.shapes: # ํ•˜๋‚˜์˜ ์Šฌ๋ผ์ด๋“œ์— ๋“ค์–ด๊ฐ„ text ์ฐพ๊ธฐ
if hasattr(shape, "text"):
string += shape.text + "\n"
# distinguish title and content
'''
# title
title = slide.shapes.title
if title is None:
string += 'title is None'
else:
string += title.text
string += '\n'
# content
for idx, content in enumerate(slide.shapes.placeholders):
# content = slide.shapes.placeholders[1]
if idx == 0:
continue
if content is None:
string += 'content is None'
else:
string += content.text
'''
# table
for shape in slide.shapes:
if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
table_data = []
row_count = len(shape.table.rows)
col_count = len(shape.table.columns)
for _r in range(0, row_count):
row = []
for _c in range(0, col_count):
cell = shape.table.cell(_r, _c)
# row ๋ณ„ ๋ฐ์ดํ„ฐ๋ฅผ array๋กœ ์ €์žฅ
row.append(cell.text)
# row ๋ฐ์ดํ„ฐ๋ฅผ ์ „์ฒด ๋ฐ์ดํ„ฐ ์ €์žฅ array์— ์ €์žฅ
table_data.append(row)
# ํ•„์š”์— ๋”ฐ๋ผ์„œ๋Š” pandas์˜ dataframe ๋“ฑ์„ ์ด์šฉํ•ด์„œ ๋ฐ์ดํ„ฐ ์ €์žฅ
df_temp = pd.DataFrame(columns=table_data[0], data=table_data[1:])
string += str(table_data)
# add delimiter
string += '\n---------------\n'
result.append(string)
except Exception as e:
result = f"Error: {str(e)}"
# make script based on slide title & contents
msg_fb = msg_fb + "- Input:\n" + ' '.join(result)
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": msg_fb},
],
temperature=0.3,
frequency_penalty=0,
presence_penalty=0,
top_p=0.2
)
response = response.choices[0].message.content
done_presentation = preprocessing_script(response, presentation)
# save pptx file
done_presentation.save('edited_ppt_file.pptx')
# return ' '.join(result), 'edited_ppt_file.pptx'
return response, 'edited_ppt_file.pptx'
def preprocessing_script(response, presentation):
# split chatgpt's response
pattern = r"- Slide (\d+) Scripts:"
segments = re.split(pattern, response)
# print(response)
for i in range(1, len(segments), 2): # ๋ช‡๋ฒˆ์งธ ์Šฌ๋ผ์ด๋“œ ์ธ์ง€๋„ ๋‚˜์™€์„œ 2์นธ์”ฉ ์ ํ”„
slide_num = int(segments[i])
script = segments[i+1].strip()
# print(script)
slide = presentation.slides[slide_num - 1]
# add sclide note each of them
notes_slide = slide.notes_slide
text_frame = notes_slide.notes_text_frame
text_frame.text = script
return presentation
# with gr.Blocks() as demo:
# upload_button = gr.UploadButton("Click to Upload a File", file_types=[".pptx"], file_count="multiple")
# upload_button.upload(get_filename, upload_button, "text")
# output = gr.Textbox(label='output_box')
demo = gr.Interface(get_filename, "file", outputs=["text", "file"], examples=[["test_final.pptx"]])
demo.launch()