Spaces:
Sleeping
Sleeping
File size: 4,379 Bytes
45cec28 0932564 ba4f426 0932564 4a99335 3a09f3c 4a99335 981cdfb b10341b 45cec28 e46e576 ba4f426 e46e576 ba4f426 4a99335 ba4f426 e46e576 0932564 ba4f426 e46e576 4cf0c03 e46e576 4cf0c03 e46e576 ba4f426 e46e576 4cf0c03 0932564 4a99335 0932564 45cec28 ba4f426 e46e576 ba4f426 4a99335 ba4f426 4a99335 e46e576 ba4f426 4a99335 e46e576 ba4f426 e46e576 4a99335 e46e576 4a99335 e46e576 ba4f426 e46e576 ba4f426 4a99335 3a09f3c 4a99335 ba4f426 e46e576 ba4f426 e46e576 ba4f426 45cec28 6a7b15a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import gradio as gr
from openai import OpenAI
import tiktoken
from os import getenv as os_getenv
from json import loads as json_loads
from pathlib import Path
import fitz
MODEL = 'gpt-4-turbo'
PRICE_PER_M = 10.00
LIMIT = 125000 # some space for answer
api_key = os_getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)
def new_state():
return gr.State({
"prompt": "",
})
def get_prompt(books, question = None):
prompt = (
f"Read the following books.\n" +
f"Each book may have some pages at the beggining with data about the book, an index, or table of content, etc. " +
f"Pages may have a header and/or a footer. Consider all this maybe present." +
f"For each book, please answer, all below in the suggested format and also answer all the questions at the end in detail, if present.\n"
f"Answer in the language of the book:\n"+
f"**Title**: ...\n"
f"**Author**: ...\n"
f"**Chapter Names**: ...\n"
f"**Characters**: \n"
f"**Detailed Summary of the whole book**: \n"
)
prompt += f"{books}\n"
return prompt
def chat(message, history, files, state):
history_openai_format = []
prompt = state["prompt"]
if not message:
if len(history) > 0:
gr.Error("You sent an empty question. It's expensive, don't do it")
return ''
if not prompt:
gr.Error("First upload a book")
return ''
if (not history):
if message:
prompt += f"**Questions**:{message}"
state["prompt"] = prompt
message = prompt
for human, assistant in history:
if not history_openai_format:
history_openai_format.append({"role": "user", "content": prompt})
elif human:
history_openai_format.append({"role": "user", "content": human })
if assistant:
history_openai_format.append({"role": "assistant", "content":assistant})
if message:
history_openai_format.append({"role": "user", "content": message})
# return f"hola {len(history)}"
#
# def no():
response = client.chat.completions.create(
model=MODEL,
messages= history_openai_format,
temperature=1.0,
stream=True)
partial_message = ""
for chunk in response:
if chunk.choices[0].delta.content is not None:
partial_message = partial_message + chunk.choices[0].delta.content
yield partial_message
def get_text(filename):
answer = ""
suffix = Path(filename).suffix
if suffix in [".pdf"]:
for i,page in enumerate(fitz.open(filename)):
answer += f"\n### Page #{i+1}\n{page.get_text()}\n"
elif suffix in [".txt"]:
answer = open(filename).read()
return answer
def files_ready(filenames, state):
encoder = encoding = tiktoken.encoding_for_model('gpt-4-turbo')
books = ''
for i, name in enumerate(filenames):
books += f"\n## Document #{i+1}\nName: {Path(name).name}\n"
books += get_text(name)
prompt = get_prompt(books)
tokens = len(encoder.encode(prompt))
cost = tokens * PRICE_PER_M / 1000000 * 2 # * 2 is too much for an answer
state["prompt"] = prompt
if tokens > LIMIT:
raise gr.Error(f"Book is too long. It's {tokens} tokens long and can't be more than {LIMIT}.")
return len(prompt), tokens, f"${cost}", state
def files_changed(filenames, state):
if filenames:
return "-", "-", "-", state
else:
return 0, 0, "$0", new_state()
with gr.Blocks(title="Book summarization and more") as demo:
state = new_state()
with gr.Row():
files = gr.Files(file_types=["txt","doc","docx","pdf"] )
with gr.Column():
letters = gr.Text("0", label="Letters (with spaces)")
tokens = gr.Text("0", label="Tokens")
cost = gr.Text("0", label="Cost")
chat = gr.ChatInterface(
fn=chat,
title="Summarization and more",
additional_inputs=[files, state],
multimodal=False)
other = gr.Button(interactive=False)
files.upload(files_ready, [files, state], [letters, tokens, cost, state])
files.change(files_changed, [files, state], [letters, tokens, cost, state])
auth=os_getenv("APP_USERS", "null")
auth=json_loads(auth)
demo.launch(auth=auth)
|