Spaces:

gerasdf
/

summarizer

Sleeping

summarizer / app.py

gera

OPENAI_API_KEY rename, and automatic

981cdfb almost 2 years ago

4.38 kB

	import gradio as gr
	from openai import OpenAI
	import tiktoken
	from os import getenv as os_getenv
	from json import loads as json_loads
	from pathlib import Path
	import fitz

	MODEL = 'gpt-4-turbo'
	PRICE_PER_M = 10.00
	LIMIT = 125000 # some space for answer

	api_key = os_getenv("OPENAI_API_KEY")
	client = OpenAI(api_key=api_key)

	def new_state():
	return gr.State({
	"prompt": "",
	})

	def get_prompt(books, question = None):
	prompt = (
	f"Read the following books.\n" +
	f"Each book may have some pages at the beggining with data about the book, an index, or table of content, etc. " +
	f"Pages may have a header and/or a footer. Consider all this maybe present." +
	f"For each book, please answer, all below in the suggested format and also answer all the questions at the end in detail, if present.\n"
	f"Answer in the language of the book:\n"+
	f"Title: ...\n"
	f"Author: ...\n"
	f"Chapter Names: ...\n"
	f"Characters: \n"
	f"Detailed Summary of the whole book: \n"
	)
	prompt += f"{books}\n"

	return prompt

	def chat(message, history, files, state):
	history_openai_format = []

	prompt = state["prompt"]

	if not message:
	if len(history) > 0:
	gr.Error("You sent an empty question. It's expensive, don't do it")
	return ''

	if not prompt:
	gr.Error("First upload a book")
	return ''

	if (not history):
	if message:
	prompt += f"Questions:{message}"
	state["prompt"] = prompt
	message = prompt

	for human, assistant in history:
	if not history_openai_format:
	history_openai_format.append({"role": "user", "content": prompt})
	elif human:
	history_openai_format.append({"role": "user", "content": human })
	if assistant:
	history_openai_format.append({"role": "assistant", "content":assistant})

	if message:
	history_openai_format.append({"role": "user", "content": message})
	# return f"hola {len(history)}"
	#
	# def no():
	response = client.chat.completions.create(
	model=MODEL,
	messages= history_openai_format,
	temperature=1.0,
	stream=True)

	partial_message = ""
	for chunk in response:
	if chunk.choices[0].delta.content is not None:
	partial_message = partial_message + chunk.choices[0].delta.content
	yield partial_message

	def get_text(filename):
	answer = ""
	suffix = Path(filename).suffix
	if suffix in [".pdf"]:
	for i,page in enumerate(fitz.open(filename)):
	answer += f"\n### Page #{i+1}\n{page.get_text()}\n"
	elif suffix in [".txt"]:
	answer = open(filename).read()
	return answer

	def files_ready(filenames, state):
	encoder = encoding = tiktoken.encoding_for_model('gpt-4-turbo')
	books = ''
	for i, name in enumerate(filenames):
	books += f"\n## Document #{i+1}\nName: {Path(name).name}\n"
	books += get_text(name)

	prompt = get_prompt(books)
	tokens = len(encoder.encode(prompt))
	cost = tokens * PRICE_PER_M / 1000000 * 2 # * 2 is too much for an answer
	state["prompt"] = prompt

	if tokens > LIMIT:
	raise gr.Error(f"Book is too long. It's {tokens} tokens long and can't be more than {LIMIT}.")
	return len(prompt), tokens, f"${cost}", state

	def files_changed(filenames, state):
	if filenames:
	return "-", "-", "-", state
	else:
	return 0, 0, "$0", new_state()

	with gr.Blocks(title="Book summarization and more") as demo:
	state = new_state()
	with gr.Row():
	files = gr.Files(file_types=["txt","doc","docx","pdf"] )
	with gr.Column():
	letters = gr.Text("0", label="Letters (with spaces)")
	tokens = gr.Text("0", label="Tokens")
	cost = gr.Text("0", label="Cost")

	chat = gr.ChatInterface(
	fn=chat,
	title="Summarization and more",
	additional_inputs=[files, state],
	multimodal=False)

	other = gr.Button(interactive=False)
	files.upload(files_ready, [files, state], [letters, tokens, cost, state])
	files.change(files_changed, [files, state], [letters, tokens, cost, state])

	auth=os_getenv("APP_USERS", "null")
	auth=json_loads(auth)

	demo.launch(auth=auth)