GPTdoc / app.py
YvesP's picture
improved synchro for source process
f178e59
import os.path
import gradio as gr
import numpy as np
import asyncio
import shutil
import src.control.control as control
"""
==================================
A. Component part
==================================
"""
with gr.Blocks() as gptdoc:
with gr.Row():
with gr.Column():
pass
with gr.Column(scale=10):
gr.Markdown("# GPTdoc")
gr.Markdown ("Master your sources when generating your documents")
gr.Markdown("### 1. You define the plan of your document ")
gr.Markdown("### 2. You select the sources for the text generation: from public unkown sources,"
" from wikipedia, or from your own files")
gr.Markdown("### 3. You launch the text generation and, when it is ready, "
"you can download the generated text")
gr.Markdown("\n\n\n")
"""
1. input docs components
"""
gr.Markdown("# 1. Define the plan of your document")
f = open('data/usage.txt', 'r')
usage = f.read()
input_text = gr.Textbox(
label="enter your text",
lines=25,
max_lines=25,
interactive=True,
elem_classes="selected_",
placeholder=usage,
)
with gr.Row():
upload_btn = gr.UploadButton(type='file')
example1 = gr.Button("One task example")
example2 = gr.Button("Several tasks example")
"""
2. source components
"""
gr.Markdown("# 2. Choose the sources for the document generation")
with gr.Column(visible=True, variant='panel') as select_col:
gr.Markdown("### Select the sources")
source_radio = gr.Radio(
choices=["Unknown sources", "My own sources"],
label="",
value="Unknown sources",
visible=True,
)
with gr.Column(visible=False, variant='panel') as db_col:
gr.Markdown("### My sources")
db_list_comp = gr.CheckboxGroup(
label="Current content",
info="These documents are currently your sources. Unselect the documents you don't want to be taken"
"into account when generating the document",
visible=True,
interactive=True,
)
with gr.Row():
db_reset_btn = gr.Button("Reset the sources", visible=False).style(full_width=False, size="sm")
db_add_doc_btn = gr.Button("Add new documents", visible=True).style(full_width=False, size="sm")
with gr.Column(visible=False, variant="panel") as add_col:
gr.Markdown("### Add new documents ")
with gr.Tab("From Wikipedia"):
wiki_fetch_btn = gr.Button("Search for Wikipedia pages", visible=True)
wiki_fetch_btn.style(full_width=False, size="sm")
wiki_list_comp = gr.CheckboxGroup(
label="Select the wiki pages",
info="The selected pages can be added to sources",
visible=False,
interactive=True,
)
wiki_add_to_db_btn = gr.Button("Add selection to sources", visible=False)
wiki_add_to_db_btn.style(full_width=False, size="sm")
with gr.Tab("From disk"):
my_files_list_comp = gr.Files(
label="Upload own documents",
info="Your selected documents provide the content for generating the output document",
visible=True,
)
my_files_add_to_db_btn = gr.Button("Add files to sources", visible=False)
my_files_add_to_db_btn.style(full_width=False, size="sm")
add_close_btn = gr.Button("Close").style(size='sm', full_width=False)
"""
3. Generate (and inspect the document)
"""
gr.Markdown("# 3. Generate the document")
generate_btn = gr.Button("Generate", interactive=True)
output_text = gr.Textbox(
label="Generated document",
value="",
lines=25,
max_lines=25,
interactive=False,
)
generated_file = gr.File(
interactive=False,
visible=False,
)
with gr.Column():
pass
"""
==================================
B. Logic part
==================================
"""
"""
B.1 Input text
"""
def upload_input_file(file_):
return upload_file(file_.name)
def upload_example_file(btn, input_id_):
filename = "onetask_example.txt" if btn == "One task example" else "long_example.txt"
long_id = control.get_long_id(input_id_)
os.mkdir('tmp_input/' + long_id)
copypath = 'tmp_input/' + long_id + '/' + filename
shutil.copy("data/" + filename, copypath)
update_ = upload_file(copypath)
update_[input_id] = gr.update(value=long_id)
return update_
def upload_file(filename):
f_ = open(filename, "r")
input_text_ = f_.read()
update_ = {
input_text: gr.update(value=input_text_)
}
return update_
input_id = gr.State(-1)
upload_btn.upload(upload_input_file, inputs=[upload_btn], outputs=[input_text])
example1.click(upload_example_file, inputs=[example1, input_id], outputs=[input_text, input_id])
example2.click(upload_example_file, inputs=[example2, input_id], outputs=[input_text, input_id])
"""
--------------------
B.2 Logic for sources
--------------------
"""
def source1_fn(source_, db_collection_):
"""
Allows to choose the sources for the doc generation
"""
if source_ == "My own sources":
long_id = control.get_long_id(db_collection_)
update_ = {
db_collection_var: long_id,
db_col: gr.update(visible=True),
}
else:
update_ = {
db_col: gr.update(visible=False),
}
return update_
def source2_fn(db_collection_):
"""
Allows to choose the sources for the doc generation
"""
long_id = control.get_long_id(db_collection_)
control.get_or_create_collection(long_id)
def db_reset_fn(wiki_source_, db_collection_):
"""
resets the source db
"""
coll = control.get_or_create_collection(db_collection_)
control.reset_collection(coll)
wiki_to_add_not_empty = 0 < len(wiki_source_)
update_ = {
wiki_db_var: [],
my_files_db_var: [],
db_reset_btn: gr.update(visible=False),
db_list_comp: gr.update(value=[], choices=[]),
wiki_list_comp: gr.update(value=wiki_source_, choices=wiki_source_),
wiki_add_to_db_btn: gr.update(visible=wiki_to_add_not_empty),
}
return update_
def db_add_doc_fn():
"""
opens the component which allows to add new own files or wiki to the source db
"""
update_ = {
db_add_doc_btn: gr.update(visible=False),
add_col: gr.update(visible=True),
}
return update_
def add_close_fn():
"""
close the component which allows to add new own files or wiki to the source db
"""
update_ = {
db_add_doc_btn: gr.update(visible=True),
add_col: gr.update(visible=False),
}
return update_
def wiki_fetch1_fn():
"""
fetch the wikifiles interesting for solving the tasks as defined in the input doc
"""
update_ = {
wiki_list_comp: gr.update(visible=True),
}
return update_
def wiki_fetch2_fn(wiki_db_files_, input_text_):
"""
fetch the wikifiles interesting for solving the tasks as defined in the input doc
"""
wiki_interesting_files = control.wiki_fetch(input_text_)
wiki_files = [wiki for wiki in wiki_interesting_files if wiki not in wiki_db_files_]
update_ = {
wiki_list_comp: gr.update(visible=True, value=wiki_files, choices=wiki_files),
wiki_add_to_db_btn: gr.update(visible=True),
wiki_source_var: wiki_interesting_files,
}
return update_
async def wiki_add_to_db_fn(wiki_list_, wiki_source_, wiki_db_, db_list_, db_collection_):
"""
adds the wikipages to the db source
"""
wiki_to_add = [wiki for wiki in wiki_list_ if wiki not in wiki_db_]
db_list_ += wiki_to_add
wiki_db_ += wiki_to_add
wiki_source_remaining = [wiki for wiki in wiki_source_ if wiki not in wiki_db_]
tasks = [control.wiki_upload_and_store(wiki, db_collection_) for wiki in wiki_to_add]
await asyncio.gather(*tasks)
db_not_empty = 0 < len(db_list_)
wiki_to_add_not_empty = 0 < len(wiki_source_remaining)
update_ = {
wiki_db_var: wiki_db_,
wiki_list_comp: gr.update(value=wiki_source_remaining, choices=wiki_source_remaining),
wiki_add_to_db_btn: gr.update(visible=wiki_to_add_not_empty),
db_list_comp: gr.update(
visible=True,
value=db_list_,
choices=db_list_,
label="Database content"),
db_reset_btn: gr.update(visible=db_not_empty),
generate_btn: gr.update(visible=True, interactive=db_not_empty),
}
return update_
def my_files_list_fn(my_files_list_):
update_ = {
my_files_add_to_db_btn: gr.update(visible=bool(my_files_list_))
}
return update_
async def my_files_add_to_db_fn(my_files_list_, my_files_db_, db_list_):
"""
adds the files to the db source
"""
my_files_to_add = [fi.name for fi in my_files_list_ if fi.name not in my_files_db_]
tasks = [control.my_files_upload_and_store(f_name) for f_name in my_files_to_add]
await asyncio.gather(*tasks)
my_files_to_add = [os.path.basename(f_name) for f_name in my_files_to_add]
my_files_db_ += my_files_to_add
db_list_ += my_files_to_add
update_ = {
my_files_list_comp: gr.update(value=None),
my_files_add_to_db_btn: gr.update(visible=False),
my_files_db_var: gr.update(value=my_files_db_),
generate_btn: gr.update(interactive=True),
db_reset_btn: gr.update(visible=True),
db_list_comp: gr.update(
visible=True,
value=db_list_,
choices=db_list_,
label="Database content"),
}
return update_
wiki_source_var: [str] = gr.State([]) # list of wikipage titles of interest for the input text tasks
wiki_db_var: [str] = gr.State([]) # list of wiki document titles in the db (as seen from the UI)
my_files_db_var: [str] = gr.State([]) # list of titles of the files uploaded in the db (as seen from the UI)
db_collection_var: str = gr.State(-1) # name of the collection of documents sources in the db
source_radio\
.change(source1_fn, inputs=[source_radio, db_collection_var], outputs=[db_col, db_collection_var])\
.then(source2_fn, inputs=[db_collection_var], outputs=[])
db_add_doc_btn.click(db_add_doc_fn, inputs=[], outputs=[db_add_doc_btn, add_col])
add_close_btn.click(add_close_fn, inputs=[], outputs=[db_add_doc_btn, add_col])
wiki_fetch_btn\
.click(wiki_fetch1_fn, inputs=[], outputs=[wiki_list_comp])\
.then(wiki_fetch2_fn,
inputs=[wiki_db_var, input_text],
outputs=[wiki_list_comp, wiki_source_var, wiki_add_to_db_btn])
wiki_add_to_db_btn.click(wiki_add_to_db_fn,
inputs=[wiki_list_comp, wiki_source_var, wiki_db_var, db_list_comp, db_collection_var],
outputs=[db_list_comp, wiki_list_comp, wiki_db_var,
generate_btn, wiki_add_to_db_btn, db_reset_btn])
my_files_list_comp.change(my_files_list_fn, inputs=[my_files_list_comp], outputs=[my_files_add_to_db_btn])
my_files_add_to_db_btn.click(my_files_add_to_db_fn,
inputs=[my_files_list_comp, my_files_db_var, db_list_comp],
outputs=[my_files_add_to_db_btn, my_files_list_comp, my_files_db_var,
db_reset_btn, generate_btn, db_list_comp])
db_reset_btn.click(db_reset_fn,
inputs=[wiki_source_var, db_collection_var],
outputs=[wiki_db_var, my_files_db_var, db_list_comp, db_reset_btn,
db_add_doc_btn, wiki_list_comp, wiki_add_to_db_btn])
"""
--------------------
B.3 Logic for generation
--------------------
"""
def generate_fn(input_text_, source_, db_collection_, db_list_):
"""
generates the final text starting from the input text and the source : either "public" or private = from
documents stored in the collection in the db
"""
rand_dir_path = "./" + str(np.random.randint(1000))
os.mkdir(rand_dir_path)
fpath = rand_dir_path + "/generated_text.txt"
f_ = open(fpath, "w")
if source_ == "Unknown sources":
output_text_ = control.generate_doc_from_gpt(input_text_)
else:
coll = db_collection_
output_text_ = control.generate_doc_from_db(input_txt=input_text_,
collection_name=coll,
from_files=db_list_)
f_.write(output_text_)
f_.seek(0)
update_ = {
output_text: gr.update(value=output_text_),
generated_file: gr.update(visible=True, value=f_.name),
}
return update_
generate_btn.click(generate_fn,
inputs=[input_text, source_radio, db_collection_var, db_list_comp],
outputs=[output_text, generated_file])
"""
==================================
Launch
==================================
"""
gptdoc.queue().launch()