File size: 4,379 Bytes
45cec28
0932564
ba4f426
 
 
 
 
0932564
4a99335
 
3a09f3c
4a99335
981cdfb
b10341b
45cec28
e46e576
 
 
 
 
ba4f426
 
 
 
 
e46e576
 
ba4f426
 
 
 
4a99335
ba4f426
 
 
 
 
e46e576
0932564
ba4f426
e46e576
 
 
 
 
 
 
 
 
 
 
4cf0c03
 
 
 
e46e576
 
 
4cf0c03
 
 
e46e576
 
 
ba4f426
e46e576
 
4cf0c03
 
 
0932564
4a99335
0932564
 
 
 
 
 
 
 
 
45cec28
ba4f426
 
 
 
 
 
 
 
 
 
e46e576
ba4f426
4a99335
ba4f426
4a99335
 
 
 
 
 
e46e576
ba4f426
4a99335
 
e46e576
ba4f426
e46e576
4a99335
e46e576
4a99335
e46e576
ba4f426
 
e46e576
ba4f426
 
4a99335
3a09f3c
4a99335
 
ba4f426
 
 
 
e46e576
ba4f426
 
 
e46e576
 
ba4f426
 
 
45cec28
6a7b15a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import gradio as gr
from openai import OpenAI
import tiktoken
from os import getenv as os_getenv
from json import loads as json_loads
from pathlib import Path
import fitz

MODEL = 'gpt-4-turbo'
PRICE_PER_M = 10.00
LIMIT = 125000 # some space for answer

api_key = os_getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

def new_state():
    return gr.State({
        "prompt": "",
    })

def get_prompt(books, question = None):
    prompt = (
        f"Read the following books.\n" +
        f"Each book may have some pages at the beggining with data about the book, an index, or table of content, etc. " +
        f"Pages may have a header and/or a footer. Consider all this maybe present." +
        f"For each book, please answer, all below in the suggested format and also answer all the questions at the end in detail, if present.\n"
        f"Answer in the language of the book:\n"+
        f"**Title**: ...\n"
        f"**Author**: ...\n"
        f"**Chapter Names**: ...\n"
        f"**Characters**: \n"
        f"**Detailed Summary of the whole book**: \n"
    )
    prompt += f"{books}\n" 

    return prompt

def chat(message, history, files, state):
    history_openai_format = []

    prompt    = state["prompt"]
    
    if not message:
        if len(history) > 0:
            gr.Error("You sent an empty question. It's expensive, don't do it")
            return ''

    if not prompt:
        gr.Error("First upload a book")
        return ''
    
    if (not history):
        if message:
            prompt += f"**Questions**:{message}"
            state["prompt"] = prompt
        message = prompt

    for human, assistant in history:
        if not history_openai_format:
            history_openai_format.append({"role": "user", "content": prompt})
        elif human:
            history_openai_format.append({"role": "user", "content": human })
        if assistant:
            history_openai_format.append({"role": "assistant", "content":assistant})
    
    if message:
        history_openai_format.append({"role": "user", "content": message})
#     return f"hola {len(history)}"
#
# def no():
    response = client.chat.completions.create(
        model=MODEL,
        messages= history_openai_format,
        temperature=1.0,
        stream=True)

    partial_message = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
              partial_message = partial_message + chunk.choices[0].delta.content
              yield partial_message

def get_text(filename):
    answer = ""
    suffix = Path(filename).suffix
    if suffix in [".pdf"]:
        for i,page in enumerate(fitz.open(filename)):
            answer += f"\n### Page #{i+1}\n{page.get_text()}\n"
    elif suffix in [".txt"]:
        answer = open(filename).read()
    return answer

def files_ready(filenames, state):
    encoder = encoding = tiktoken.encoding_for_model('gpt-4-turbo')
    books = ''
    for i, name in enumerate(filenames):
        books += f"\n## Document #{i+1}\nName: {Path(name).name}\n"
        books += get_text(name)

    prompt = get_prompt(books)
    tokens = len(encoder.encode(prompt))
    cost = tokens * PRICE_PER_M / 1000000 * 2 # * 2 is too much for an answer
    state["prompt"] = prompt

    if tokens > LIMIT:
        raise gr.Error(f"Book is too long. It's {tokens} tokens long and can't be more than {LIMIT}.")
    return len(prompt), tokens, f"${cost}", state

def files_changed(filenames, state):
    if filenames:
        return "-", "-", "-", state
    else:
        return 0, 0, "$0", new_state()
    
with gr.Blocks(title="Book summarization and more") as demo:
    state = new_state()
    with gr.Row():
        files = gr.Files(file_types=["txt","doc","docx","pdf"] )
        with gr.Column():
            letters = gr.Text("0", label="Letters (with spaces)")
            tokens = gr.Text("0", label="Tokens")
            cost = gr.Text("0", label="Cost")

    chat = gr.ChatInterface(
        fn=chat,
        title="Summarization and more",
        additional_inputs=[files, state],
        multimodal=False)
    
    other = gr.Button(interactive=False)
    files.upload(files_ready, [files, state], [letters, tokens, cost, state])
    files.change(files_changed, [files, state], [letters, tokens, cost, state])

auth=os_getenv("APP_USERS", "null")
auth=json_loads(auth)

demo.launch(auth=auth)