initial load
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- 81/generated_text.txt +7 -0
- README.md +4 -4
- __pycache__/test_input_file.cpython-311-pytest-7.3.1.pyc +0 -0
- app.py +374 -0
- data/list +20 -0
- data/long_example.txt +21 -0
- data/onetask_example.txt +7 -0
- data/usage.txt +13 -0
- requirements.txt +111 -0
- src/control/__pycache__/control.cpython-311.pyc +0 -0
- src/control/control.py +93 -0
- src/model/__pycache__/document.cpython-311.pyc +0 -0
- src/model/document.py +176 -0
- src/model/model.py +60 -0
- src/tools/__pycache__/llm_tools.cpython-311.pyc +0 -0
- src/tools/__pycache__/llms.cpython-311.pyc +0 -0
- src/tools/__pycache__/semantic_db.cpython-311.pyc +0 -0
- src/tools/__pycache__/wiki.cpython-311.pyc +0 -0
- src/tools/llm_tools.py +207 -0
- src/tools/llms.py +20 -0
- src/tools/semantic_db.py +60 -0
- src/tools/wiki.py +61 -0
- tests/.chroma/index/id_to_uuid_0c55a091-9f95-4a8d-b868-83d95412fdc4.pkl +3 -0
- tests/.chroma/index/id_to_uuid_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.pkl +3 -0
- tests/.chroma/index/id_to_uuid_37825327-eef6-4255-92ac-787c21197d77.pkl +3 -0
- tests/.chroma/index/id_to_uuid_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.pkl +3 -0
- tests/.chroma/index/id_to_uuid_46204504-325f-47e6-9176-e2054080ad57.pkl +3 -0
- tests/.chroma/index/id_to_uuid_64afc7c0-c153-47d1-af52-55e1738ae76c.pkl +3 -0
- tests/.chroma/index/id_to_uuid_69550299-be81-45fa-8bbf-3d83be2d7991.pkl +3 -0
- tests/.chroma/index/id_to_uuid_78f80853-f999-4f5e-b320-41c98bd28592.pkl +3 -0
- tests/.chroma/index/id_to_uuid_90d6076c-bb50-40ed-90a1-2df2243fd12e.pkl +3 -0
- tests/.chroma/index/id_to_uuid_a10bf13e-424a-41cd-bcfb-27d8072711ea.pkl +3 -0
- tests/.chroma/index/id_to_uuid_a6f9bfcf-0593-40b1-a282-a54d5b75d939.pkl +3 -0
- tests/.chroma/index/id_to_uuid_aba244c9-042f-42a3-860c-a68e1ee0b4a5.pkl +3 -0
- tests/.chroma/index/id_to_uuid_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.pkl +3 -0
- tests/.chroma/index/id_to_uuid_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.pkl +3 -0
- tests/.chroma/index/id_to_uuid_bcb0093e-68dd-4d75-a758-63ef7a681d92.pkl +3 -0
- tests/.chroma/index/id_to_uuid_bf57b36f-a918-4484-b897-79f751d5cad4.pkl +3 -0
- tests/.chroma/index/id_to_uuid_e208b245-d2cd-4069-9a8c-d5f010d91afb.pkl +3 -0
- tests/.chroma/index/id_to_uuid_f09229bd-8639-49e8-8a84-8e6e0aa11971.pkl +3 -0
- tests/.chroma/index/index_0c55a091-9f95-4a8d-b868-83d95412fdc4.bin +3 -0
- tests/.chroma/index/index_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.bin +3 -0
- tests/.chroma/index/index_37825327-eef6-4255-92ac-787c21197d77.bin +3 -0
- tests/.chroma/index/index_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.bin +3 -0
- tests/.chroma/index/index_46204504-325f-47e6-9176-e2054080ad57.bin +3 -0
- tests/.chroma/index/index_64afc7c0-c153-47d1-af52-55e1738ae76c.bin +3 -0
- tests/.chroma/index/index_69550299-be81-45fa-8bbf-3d83be2d7991.bin +3 -0
- tests/.chroma/index/index_78f80853-f999-4f5e-b320-41c98bd28592.bin +3 -0
- tests/.chroma/index/index_90d6076c-bb50-40ed-90a1-2df2243fd12e.bin +3 -0
- tests/.chroma/index/index_a10bf13e-424a-41cd-bcfb-27d8072711ea.bin +3 -0
81/generated_text.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
!! terrorism in Italy in the years 70 and 80
|
| 2 |
+
|
| 3 |
+
# what happened
|
| 4 |
+
|
| 5 |
+
## the killing of Aldo Moro
|
| 6 |
+
Aldo Moro was an Italian statesman and a prominent member of the Christian Democracy party. He was kidnapped and killed by the Red Brigades, a left-wing terrorist group, in 1978. The Red Brigades wanted to destabilize the Italian government and force the release of their imprisoned members. They believed that Moro was the key to achieving their goals, and so they targeted him for assassination. Moro's death was a major blow to the Italian government and to the Christian Democracy party, and it marked the beginning of a period of increased terrorism in Italy. The Red Brigades were eventually disbanded, but the legacy of Moro's death still lingers in Italy today.
|
| 7 |
+
|
README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 3.32.0
|
| 8 |
app_file: app.py
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Gendoc
|
| 3 |
+
emoji: 🦀
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 3.32.0
|
| 8 |
app_file: app.py
|
__pycache__/test_input_file.cpython-311-pytest-7.3.1.pyc
ADDED
|
Binary file (7.48 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,374 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os.path
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import numpy as np
|
| 4 |
+
import asyncio
|
| 5 |
+
import shutil
|
| 6 |
+
|
| 7 |
+
import src.control.control as control
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
"""
|
| 11 |
+
==================================
|
| 12 |
+
A. Component part
|
| 13 |
+
==================================
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
with gr.Blocks() as docgpt:
|
| 17 |
+
with gr.Row():
|
| 18 |
+
|
| 19 |
+
with gr.Column():
|
| 20 |
+
pass
|
| 21 |
+
|
| 22 |
+
with gr.Column(scale=10):
|
| 23 |
+
"""
|
| 24 |
+
1. input docs components
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
gr.Markdown("# 1. Define the plan of your document")
|
| 28 |
+
|
| 29 |
+
f = open('data/usage.txt', 'r')
|
| 30 |
+
usage = f.read()
|
| 31 |
+
|
| 32 |
+
input_text = gr.Textbox(
|
| 33 |
+
label="enter your text",
|
| 34 |
+
lines=25,
|
| 35 |
+
max_lines=25,
|
| 36 |
+
interactive=True,
|
| 37 |
+
elem_classes="selected_",
|
| 38 |
+
placeholder=usage,
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
with gr.Row():
|
| 42 |
+
upload_btn = gr.UploadButton(type='file')
|
| 43 |
+
example1 = gr.Button("One task example")
|
| 44 |
+
example2 = gr.Button("Several tasks example")
|
| 45 |
+
|
| 46 |
+
"""
|
| 47 |
+
2. source components
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
gr.Markdown("# 2. Choose the sources for the document generation")
|
| 51 |
+
|
| 52 |
+
with gr.Column(visible=True, variant='panel') as select_col:
|
| 53 |
+
gr.Markdown("### Select the sources")
|
| 54 |
+
source_radio = gr.Radio(
|
| 55 |
+
choices=["Unknown sources", "My own sources"],
|
| 56 |
+
label="",
|
| 57 |
+
value="Unknown sources",
|
| 58 |
+
visible=True,
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
with gr.Column(visible=False, variant='panel') as db_col:
|
| 62 |
+
gr.Markdown("### My sources")
|
| 63 |
+
db_list_comp = gr.CheckboxGroup(
|
| 64 |
+
label="Current content",
|
| 65 |
+
info="These documents are currently your sources. Unselect the documents you don't want to be taken"
|
| 66 |
+
"into account when generating the document",
|
| 67 |
+
visible=True,
|
| 68 |
+
interactive=True,
|
| 69 |
+
)
|
| 70 |
+
with gr.Row():
|
| 71 |
+
db_reset_btn = gr.Button("Reset the sources", visible=False).style(full_width=False, size="sm")
|
| 72 |
+
db_add_doc_btn = gr.Button("Add new documents", visible=True).style(full_width=False, size="sm")
|
| 73 |
+
|
| 74 |
+
with gr.Column(visible=False, variant="panel") as add_col:
|
| 75 |
+
gr.Markdown("### Add new documents ")
|
| 76 |
+
|
| 77 |
+
with gr.Tab("From Wikipedia"):
|
| 78 |
+
wiki_fetch_btn = gr.Button("Search for Wikipedia pages", visible=True)
|
| 79 |
+
wiki_fetch_btn.style(full_width=False, size="sm")
|
| 80 |
+
wiki_list_comp = gr.CheckboxGroup(
|
| 81 |
+
label="Select the wiki pages",
|
| 82 |
+
info="The selected pages can be added to sources",
|
| 83 |
+
visible=False,
|
| 84 |
+
interactive=True,
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
wiki_add_to_db_btn = gr.Button("Add selection to sources", visible=False)
|
| 88 |
+
wiki_add_to_db_btn.style(full_width=False, size="sm")
|
| 89 |
+
|
| 90 |
+
with gr.Tab("From disk"):
|
| 91 |
+
my_files_list_comp = gr.Files(
|
| 92 |
+
label="Upload own documents",
|
| 93 |
+
info="Your selected documents provide the content for generating the output document",
|
| 94 |
+
visible=True,
|
| 95 |
+
)
|
| 96 |
+
my_files_add_to_db_btn = gr.Button("Add files to sources", visible=False)
|
| 97 |
+
my_files_add_to_db_btn.style(full_width=False, size="sm")
|
| 98 |
+
|
| 99 |
+
add_close_btn = gr.Button("Close").style(size='sm', full_width=False)
|
| 100 |
+
|
| 101 |
+
"""
|
| 102 |
+
3. Generate (and inspect the document)
|
| 103 |
+
"""
|
| 104 |
+
|
| 105 |
+
gr.Markdown("# 3. Generate the document")
|
| 106 |
+
|
| 107 |
+
generate_btn = gr.Button("Generate", interactive=True)
|
| 108 |
+
|
| 109 |
+
output_text = gr.Textbox(
|
| 110 |
+
label="Generated document",
|
| 111 |
+
value="",
|
| 112 |
+
lines=25,
|
| 113 |
+
max_lines=25,
|
| 114 |
+
interactive=False,
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
generated_file = gr.File(
|
| 118 |
+
interactive=False,
|
| 119 |
+
visible=False,
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
with gr.Column():
|
| 123 |
+
pass
|
| 124 |
+
|
| 125 |
+
"""
|
| 126 |
+
==================================
|
| 127 |
+
B. Logic part
|
| 128 |
+
==================================
|
| 129 |
+
"""
|
| 130 |
+
|
| 131 |
+
"""
|
| 132 |
+
B.1 Input text
|
| 133 |
+
"""
|
| 134 |
+
def upload_input_file(file_):
|
| 135 |
+
return upload_file(file_.name)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def upload_example_file(btn, input_id_):
|
| 139 |
+
filename = "onetask_example.txt" if btn == "One task example" else "long_example.txt"
|
| 140 |
+
long_id = control.get_long_id(input_id_)
|
| 141 |
+
os.mkdir('tmp_input/' + long_id)
|
| 142 |
+
copypath = 'tmp_input/' + long_id + '/' + filename
|
| 143 |
+
shutil.copy("data/" + filename, copypath)
|
| 144 |
+
update_ = upload_file(copypath)
|
| 145 |
+
update_[input_id] = gr.update(value=long_id)
|
| 146 |
+
return update_
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def upload_file(filename):
|
| 150 |
+
f_ = open(filename, "r")
|
| 151 |
+
input_text_ = f_.read()
|
| 152 |
+
update_ = {
|
| 153 |
+
input_text: gr.update(value=input_text_)
|
| 154 |
+
}
|
| 155 |
+
return update_
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
input_id = gr.State(-1)
|
| 159 |
+
|
| 160 |
+
upload_btn.upload(upload_input_file, inputs=[upload_btn], outputs=[input_text])
|
| 161 |
+
example1.click(upload_example_file, inputs=[example1, input_id], outputs=[input_text, input_id])
|
| 162 |
+
example2.click(upload_example_file, inputs=[example2, input_id], outputs=[input_text, input_id])
|
| 163 |
+
|
| 164 |
+
"""
|
| 165 |
+
--------------------
|
| 166 |
+
B.2 Logic for sources
|
| 167 |
+
--------------------
|
| 168 |
+
"""
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def source_fn(source_, db_collection_):
|
| 172 |
+
"""
|
| 173 |
+
Allows to choose the sources for the doc generation
|
| 174 |
+
"""
|
| 175 |
+
if source_ == "My own sources":
|
| 176 |
+
long_id = control.get_long_id(db_collection_)
|
| 177 |
+
control.get_or_create_collection(long_id)
|
| 178 |
+
update_ = {
|
| 179 |
+
db_col: gr.update(visible=True),
|
| 180 |
+
db_collection_var: long_id,
|
| 181 |
+
}
|
| 182 |
+
else:
|
| 183 |
+
update_ = {
|
| 184 |
+
db_col: gr.update(visible=False),
|
| 185 |
+
}
|
| 186 |
+
return update_
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def db_reset_fn(wiki_source_, db_collection_):
|
| 190 |
+
"""
|
| 191 |
+
resets the source db
|
| 192 |
+
"""
|
| 193 |
+
coll = control.get_or_create_collection(db_collection_)
|
| 194 |
+
control.reset_collection(coll)
|
| 195 |
+
wiki_to_add_not_empty = 0 < len(wiki_source_)
|
| 196 |
+
update_ = {
|
| 197 |
+
wiki_db_var: [],
|
| 198 |
+
my_files_db_var: [],
|
| 199 |
+
db_reset_btn: gr.update(visible=False),
|
| 200 |
+
db_list_comp: gr.update(value=[], choices=[]),
|
| 201 |
+
wiki_list_comp: gr.update(value=wiki_source_, choices=wiki_source_),
|
| 202 |
+
wiki_add_to_db_btn: gr.update(visible=wiki_to_add_not_empty),
|
| 203 |
+
}
|
| 204 |
+
return update_
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def db_add_doc_fn():
|
| 208 |
+
"""
|
| 209 |
+
opens the component which allows to add new own files or wiki to the source db
|
| 210 |
+
"""
|
| 211 |
+
update_ = {
|
| 212 |
+
db_add_doc_btn: gr.update(visible=False),
|
| 213 |
+
add_col: gr.update(visible=True),
|
| 214 |
+
}
|
| 215 |
+
return update_
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
def add_close_fn():
|
| 219 |
+
"""
|
| 220 |
+
close the component which allows to add new own files or wiki to the source db
|
| 221 |
+
"""
|
| 222 |
+
update_ = {
|
| 223 |
+
db_add_doc_btn: gr.update(visible=True),
|
| 224 |
+
add_col: gr.update(visible=False),
|
| 225 |
+
}
|
| 226 |
+
return update_
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def wiki_fetch_fn(wiki_db_files_, input_text_):
|
| 230 |
+
"""
|
| 231 |
+
fetch the wikifiles interesting for solving the tasks as defined in the input doc
|
| 232 |
+
"""
|
| 233 |
+
wiki_interesting_files = control.wiki_fetch(input_text_)
|
| 234 |
+
wiki_files = [wiki for wiki in wiki_interesting_files if wiki not in wiki_db_files_]
|
| 235 |
+
update_ = {
|
| 236 |
+
wiki_list_comp: gr.update(visible=True, value=wiki_files, choices=wiki_files),
|
| 237 |
+
wiki_add_to_db_btn: gr.update(visible=True),
|
| 238 |
+
wiki_source_var: wiki_interesting_files,
|
| 239 |
+
}
|
| 240 |
+
return update_
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
async def wiki_add_to_db_fn(wiki_list_, wiki_source_, wiki_db_, db_list_, db_collection_):
|
| 244 |
+
"""
|
| 245 |
+
adds the wikipages to the db source
|
| 246 |
+
"""
|
| 247 |
+
wiki_to_add = [wiki for wiki in wiki_list_ if wiki not in wiki_db_]
|
| 248 |
+
db_list_ += wiki_to_add
|
| 249 |
+
wiki_db_ += wiki_to_add
|
| 250 |
+
wiki_source_remaining = [wiki for wiki in wiki_source_ if wiki not in wiki_db_]
|
| 251 |
+
tasks = [control.wiki_upload_and_store(wiki, db_collection_) for wiki in wiki_to_add]
|
| 252 |
+
await asyncio.gather(*tasks)
|
| 253 |
+
db_not_empty = 0 < len(db_list_)
|
| 254 |
+
wiki_to_add_not_empty = 0 < len(wiki_source_remaining)
|
| 255 |
+
update_ = {
|
| 256 |
+
wiki_db_var: wiki_db_,
|
| 257 |
+
wiki_list_comp: gr.update(value=wiki_source_remaining, choices=wiki_source_remaining),
|
| 258 |
+
wiki_add_to_db_btn: gr.update(visible=wiki_to_add_not_empty),
|
| 259 |
+
db_list_comp: gr.update(
|
| 260 |
+
visible=True,
|
| 261 |
+
value=db_list_,
|
| 262 |
+
choices=db_list_,
|
| 263 |
+
label="Database content"),
|
| 264 |
+
db_reset_btn: gr.update(visible=db_not_empty),
|
| 265 |
+
generate_btn: gr.update(visible=True, interactive=db_not_empty),
|
| 266 |
+
}
|
| 267 |
+
return update_
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
def my_files_list_fn(my_files_list_):
|
| 271 |
+
|
| 272 |
+
update_ = {
|
| 273 |
+
my_files_add_to_db_btn: gr.update(visible=bool(my_files_list_))
|
| 274 |
+
}
|
| 275 |
+
return update_
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
async def my_files_add_to_db_fn(my_files_list_, my_files_db_, db_list_):
|
| 279 |
+
"""
|
| 280 |
+
adds the files to the db source
|
| 281 |
+
"""
|
| 282 |
+
my_files_to_add = [fi.name for fi in my_files_list_ if fi.name not in my_files_db_]
|
| 283 |
+
tasks = [control.my_files_upload_and_store(f_name) for f_name in my_files_to_add]
|
| 284 |
+
await asyncio.gather(*tasks)
|
| 285 |
+
my_files_to_add = [os.path.basename(f_name) for f_name in my_files_to_add]
|
| 286 |
+
my_files_db_ += my_files_to_add
|
| 287 |
+
db_list_ += my_files_to_add
|
| 288 |
+
update_ = {
|
| 289 |
+
my_files_list_comp: gr.update(value=None),
|
| 290 |
+
my_files_add_to_db_btn: gr.update(visible=False),
|
| 291 |
+
my_files_db_var: gr.update(value=my_files_db_),
|
| 292 |
+
generate_btn: gr.update(interactive=True),
|
| 293 |
+
db_reset_btn: gr.update(visible=True),
|
| 294 |
+
db_list_comp: gr.update(
|
| 295 |
+
visible=True,
|
| 296 |
+
value=db_list_,
|
| 297 |
+
choices=db_list_,
|
| 298 |
+
label="Database content"),
|
| 299 |
+
}
|
| 300 |
+
return update_
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
wiki_source_var: [str] = gr.State([]) # list of wikipage titles of interest for the input text tasks
|
| 304 |
+
wiki_db_var: [str] = gr.State([]) # list of wiki document titles in the db (as seen from the UI)
|
| 305 |
+
my_files_db_var: [str] = gr.State([]) # list of titles of the files uploaded in the db (as seen from the UI)
|
| 306 |
+
db_collection_var: str = gr.State(-1) # name of the collection of documents sources in the db
|
| 307 |
+
|
| 308 |
+
source_radio.change(source_fn, inputs=[source_radio, db_collection_var], outputs=[db_col, db_collection_var])
|
| 309 |
+
db_add_doc_btn.click(db_add_doc_fn, inputs=[], outputs=[db_add_doc_btn, add_col])
|
| 310 |
+
add_close_btn.click(add_close_fn, inputs=[], outputs=[db_add_doc_btn, add_col])
|
| 311 |
+
|
| 312 |
+
wiki_fetch_btn.click(wiki_fetch_fn,
|
| 313 |
+
inputs=[wiki_db_var, input_text],
|
| 314 |
+
outputs=[wiki_list_comp, wiki_source_var, wiki_add_to_db_btn])
|
| 315 |
+
wiki_add_to_db_btn.click(wiki_add_to_db_fn,
|
| 316 |
+
inputs=[wiki_list_comp, wiki_source_var, wiki_db_var, db_list_comp, db_collection_var],
|
| 317 |
+
outputs=[db_list_comp, wiki_list_comp, wiki_db_var,
|
| 318 |
+
generate_btn, wiki_add_to_db_btn, db_reset_btn])
|
| 319 |
+
|
| 320 |
+
my_files_list_comp.change(my_files_list_fn, inputs=[my_files_list_comp], outputs=[my_files_add_to_db_btn])
|
| 321 |
+
my_files_add_to_db_btn.click(my_files_add_to_db_fn,
|
| 322 |
+
inputs=[my_files_list_comp, my_files_db_var, db_list_comp],
|
| 323 |
+
outputs=[my_files_add_to_db_btn, my_files_list_comp, my_files_db_var,
|
| 324 |
+
db_reset_btn, generate_btn, db_list_comp])
|
| 325 |
+
db_reset_btn.click(db_reset_fn,
|
| 326 |
+
inputs=[wiki_source_var, db_collection_var],
|
| 327 |
+
outputs=[wiki_db_var, my_files_db_var, db_list_comp, db_reset_btn,
|
| 328 |
+
db_add_doc_btn, wiki_list_comp, wiki_add_to_db_btn])
|
| 329 |
+
|
| 330 |
+
"""
|
| 331 |
+
--------------------
|
| 332 |
+
B.3 Logic for generation
|
| 333 |
+
--------------------
|
| 334 |
+
"""
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def generate_fn(input_text_, source_, db_collection_, db_list_):
|
| 338 |
+
"""
|
| 339 |
+
generates the final text starting from the input text and the source : either "public" or private = from
|
| 340 |
+
documents stored in the collection in the db
|
| 341 |
+
"""
|
| 342 |
+
rand_dir_path = "./" + str(np.random.randint(1000))
|
| 343 |
+
os.mkdir(rand_dir_path)
|
| 344 |
+
fpath = rand_dir_path + "/generated_text.txt"
|
| 345 |
+
f_ = open(fpath, "w")
|
| 346 |
+
|
| 347 |
+
if source_ == "Unknown sources":
|
| 348 |
+
output_text_ = control.generate_doc_from_gpt(input_text_)
|
| 349 |
+
else:
|
| 350 |
+
coll = db_collection_
|
| 351 |
+
output_text_ = control.generate_doc_from_db(input_txt=input_text_,
|
| 352 |
+
collection_name=coll,
|
| 353 |
+
from_files=db_list_)
|
| 354 |
+
f_.write(output_text_)
|
| 355 |
+
f_.seek(0)
|
| 356 |
+
|
| 357 |
+
update_ = {
|
| 358 |
+
output_text: gr.update(value=output_text_),
|
| 359 |
+
generated_file: gr.update(visible=True, value=f_.name),
|
| 360 |
+
}
|
| 361 |
+
return update_
|
| 362 |
+
|
| 363 |
+
|
| 364 |
+
generate_btn.click(generate_fn,
|
| 365 |
+
inputs=[input_text, source_radio, db_collection_var, db_list_comp],
|
| 366 |
+
outputs=[output_text, generated_file])
|
| 367 |
+
|
| 368 |
+
"""
|
| 369 |
+
==================================
|
| 370 |
+
Launch
|
| 371 |
+
==================================
|
| 372 |
+
"""
|
| 373 |
+
|
| 374 |
+
docgpt.queue().launch()
|
data/list
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"Years of Lead (Italy)",
|
| 3 |
+
"Terrorism in Italy",
|
| 4 |
+
"Red Brigades",
|
| 5 |
+
"Ordine Nuovo",
|
| 6 |
+
"Years of Lead (Italy)",
|
| 7 |
+
"Cold War",
|
| 8 |
+
"Terrorism in Europe",
|
| 9 |
+
"Palestinian terrorism"
|
| 10 |
+
]
|
| 11 |
+
|
| 12 |
+
prompt = f"""
|
| 13 |
+
Your task is to identify the title of relevant wikipedia pages which would be helpful \
|
| 14 |
+
to expand on this text.
|
| 15 |
+
|
| 16 |
+
Give the page titles in the form of a JSON list, the text is delimited by triple \
|
| 17 |
+
backticks.
|
| 18 |
+
|
| 19 |
+
Text: ```{text}```
|
| 20 |
+
"""
|
data/long_example.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
!! terrorism in Italy in the years 70 and 80
|
| 2 |
+
|
| 3 |
+
# what happened
|
| 4 |
+
++ describe the facts in Italy
|
| 5 |
+
It is a fact , that Italy undergone several acts of terrorism in the 70s ans 80s
|
| 6 |
+
|
| 7 |
+
## summary of all events linked to terrorism
|
| 8 |
+
?? summarize terrorism events from 70 to 90 in Italy (around 100 words)
|
| 9 |
+
|
| 10 |
+
## the major events
|
| 11 |
+
?? identify several events and describe no more than 5 events (around 50 words per event)
|
| 12 |
+
## the major organisations
|
| 13 |
+
?? identify major organisations (political parties, terrorists groups, etc.) and key individuals
|
| 14 |
+
# the global context
|
| 15 |
+
++ give some context outside of Italy
|
| 16 |
+
## a specific period during the cold war between USSR and the USA
|
| 17 |
+
?? describe the specificities of the relationship between the US and USSR (around 100 words)
|
| 18 |
+
## the terrorism in the 70-80s in Europe
|
| 19 |
+
?? identify terrorism facts in the rest of Europe (e.g. Germany, France, Belgium) (around 50 words per fact)
|
| 20 |
+
## Palestinian terrorism in the 70s 80s
|
| 21 |
+
?? give some infos on Palestinian terrorism facts: acts, organisation and key individuals (around 100 words)
|
data/onetask_example.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
!! terrorism in Italy in the years 70 and 80
|
| 2 |
+
|
| 3 |
+
# what happened
|
| 4 |
+
|
| 5 |
+
## the killing of Aldo Moro
|
| 6 |
+
?? who killed Aldo Moro and why? (around 100 words)
|
| 7 |
+
|
data/usage.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
!! Title
|
| 2 |
+
|
| 3 |
+
# Heading level 1
|
| 4 |
+
## Heading level 2
|
| 5 |
+
### Heading level 3
|
| 6 |
+
#### and so on ...
|
| 7 |
+
|
| 8 |
+
?? Description of the paragraph to be generated
|
| 9 |
+
|
| 10 |
+
++ Comment: adds additional context for the text generator
|
| 11 |
+
|
| 12 |
+
normal text: it is taken into account by the text generator but remains as is in the generated document
|
| 13 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiofiles==23.1.0
|
| 2 |
+
aiohttp==3.8.4
|
| 3 |
+
aiosignal==1.3.1
|
| 4 |
+
altair==4.2.2
|
| 5 |
+
anyio==3.6.2
|
| 6 |
+
async-timeout==4.0.2
|
| 7 |
+
attrs==23.1.0
|
| 8 |
+
backoff==2.2.1
|
| 9 |
+
beautifulsoup4==4.12.2
|
| 10 |
+
cachetools==5.3.0
|
| 11 |
+
certifi==2022.12.7
|
| 12 |
+
charset-normalizer==3.1.0
|
| 13 |
+
chromadb==0.3.21
|
| 14 |
+
click==8.1.3
|
| 15 |
+
clickhouse-connect==0.5.20
|
| 16 |
+
contourpy==1.0.7
|
| 17 |
+
cycler==0.11.0
|
| 18 |
+
dataclasses-json==0.5.7
|
| 19 |
+
duckdb==0.7.1
|
| 20 |
+
entrypoints==0.4
|
| 21 |
+
fastapi==0.95.1
|
| 22 |
+
ffmpy==0.3.0
|
| 23 |
+
filelock==3.11.0
|
| 24 |
+
fonttools==4.39.3
|
| 25 |
+
frozenlist==1.3.3
|
| 26 |
+
fsspec==2023.4.0
|
| 27 |
+
google-search-results==2.4.2
|
| 28 |
+
gptcache==0.1.12
|
| 29 |
+
gradio==3.27.0
|
| 30 |
+
gradio_client==0.1.3
|
| 31 |
+
h11==0.14.0
|
| 32 |
+
hnswlib==0.7.0
|
| 33 |
+
httpcore==0.17.0
|
| 34 |
+
httptools==0.5.0
|
| 35 |
+
httpx==0.24.0
|
| 36 |
+
huggingface-hub==0.13.4
|
| 37 |
+
idna==3.4
|
| 38 |
+
iniconfig==2.0.0
|
| 39 |
+
Jinja2==3.1.2
|
| 40 |
+
joblib==1.2.0
|
| 41 |
+
jsonschema==4.17.3
|
| 42 |
+
kiwisolver==1.4.4
|
| 43 |
+
langchain==0.0.141
|
| 44 |
+
linkify-it-py==2.0.0
|
| 45 |
+
lz4==4.3.2
|
| 46 |
+
markdown-it-py==2.2.0
|
| 47 |
+
MarkupSafe==2.1.2
|
| 48 |
+
marshmallow==3.19.0
|
| 49 |
+
marshmallow-enum==1.5.1
|
| 50 |
+
matplotlib==3.7.1
|
| 51 |
+
mdit-py-plugins==0.3.3
|
| 52 |
+
mdurl==0.1.2
|
| 53 |
+
monotonic==1.6
|
| 54 |
+
mpmath==1.3.0
|
| 55 |
+
multidict==6.0.4
|
| 56 |
+
mypy-extensions==1.0.0
|
| 57 |
+
networkx==3.1
|
| 58 |
+
nltk==3.8.1
|
| 59 |
+
numpy==1.24.2
|
| 60 |
+
openai==0.27.4
|
| 61 |
+
openapi-schema-pydantic==1.2.4
|
| 62 |
+
orjson==3.8.10
|
| 63 |
+
packaging==23.1
|
| 64 |
+
pandas==2.0.0
|
| 65 |
+
Pillow==9.5.0
|
| 66 |
+
pluggy==1.0.0
|
| 67 |
+
posthog==3.0.0
|
| 68 |
+
pydantic==1.10.7
|
| 69 |
+
pydub==0.25.1
|
| 70 |
+
pyparsing==3.0.9
|
| 71 |
+
pyrsistent==0.19.3
|
| 72 |
+
pytest==7.3.1
|
| 73 |
+
python-dateutil==2.8.2
|
| 74 |
+
python-dotenv==1.0.0
|
| 75 |
+
python-multipart==0.0.6
|
| 76 |
+
pytz==2023.3
|
| 77 |
+
PyYAML==6.0
|
| 78 |
+
regex==2023.3.23
|
| 79 |
+
requests==2.28.2
|
| 80 |
+
scikit-learn==1.2.2
|
| 81 |
+
scipy==1.10.1
|
| 82 |
+
semantic-version==2.10.0
|
| 83 |
+
sentence-transformers==2.2.2
|
| 84 |
+
sentencepiece==0.1.98
|
| 85 |
+
six==1.16.0
|
| 86 |
+
sniffio==1.3.0
|
| 87 |
+
soupsieve==2.4.1
|
| 88 |
+
SQLAlchemy==1.4.47
|
| 89 |
+
starlette==0.26.1
|
| 90 |
+
sympy==1.11.1
|
| 91 |
+
tenacity==8.2.2
|
| 92 |
+
threadpoolctl==3.1.0
|
| 93 |
+
tokenizers==0.13.3
|
| 94 |
+
toolz==0.12.0
|
| 95 |
+
torch==2.0.0
|
| 96 |
+
torchvision==0.15.1
|
| 97 |
+
tqdm==4.65.0
|
| 98 |
+
transformers==4.28.1
|
| 99 |
+
typing-inspect==0.8.0
|
| 100 |
+
typing_extensions==4.5.0
|
| 101 |
+
tzdata==2023.3
|
| 102 |
+
uc-micro-py==1.0.1
|
| 103 |
+
urllib3==1.26.15
|
| 104 |
+
uvicorn==0.21.1
|
| 105 |
+
uvloop==0.17.0
|
| 106 |
+
watchfiles==0.19.0
|
| 107 |
+
websockets==11.0.2
|
| 108 |
+
wget==3.2
|
| 109 |
+
wikipedia==1.4.0
|
| 110 |
+
yarl==1.8.2
|
| 111 |
+
zstandard==0.21.0
|
src/control/__pycache__/control.cpython-311.pyc
ADDED
|
Binary file (5.91 kB). View file
|
|
|
src/control/control.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import asyncio # on va en avoir besoin :)
|
| 3 |
+
import string
|
| 4 |
+
import random
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
from src.tools.semantic_db import get_or_create_collection, reset_collection
|
| 9 |
+
from src.tools.wiki import Wiki
|
| 10 |
+
from src.model.document import InputDoc, WikiPage
|
| 11 |
+
from src.tools.llm_tools import get_wikilist, get_public_paragraph, get_private_paragraph
|
| 12 |
+
from src.tools.semantic_db import add_texts_to_collection, query_collection
|
| 13 |
+
|
| 14 |
+
"""
|
| 15 |
+
Tools
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def get_long_id(id_):
|
| 20 |
+
if id_ != -1:
|
| 21 |
+
return id_
|
| 22 |
+
else:
|
| 23 |
+
now = datetime.now().strftime("%m%d%H%M")
|
| 24 |
+
letters = string.ascii_lowercase + string.digits
|
| 25 |
+
long_id = now+'-'+''.join(random.choice(letters) for _ in range(10))
|
| 26 |
+
return long_id
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
"""
|
| 30 |
+
Input control
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
"""
|
| 36 |
+
Source Control
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
def wiki_fetch(input_text: str) -> [str]:
|
| 40 |
+
"""
|
| 41 |
+
returns the title of the wikipages corresponding to the tasks described in the input text
|
| 42 |
+
"""
|
| 43 |
+
tasks = InputDoc(input_text).tasks
|
| 44 |
+
wiki_lists = [get_wikilist(t) for t in tasks]
|
| 45 |
+
flatten_wiki_list = list(set().union(*[set(w) for w in wiki_lists]))
|
| 46 |
+
return flatten_wiki_list
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
async def wiki_upload_and_store(wiki_title: str, collection_name: str):
|
| 50 |
+
"""
|
| 51 |
+
uploads one wikipage and stores them into the right collection
|
| 52 |
+
"""
|
| 53 |
+
wikipage = Wiki().fetch(wiki_title)
|
| 54 |
+
wiki_title = wiki_title
|
| 55 |
+
if type(wikipage) != str:
|
| 56 |
+
texts = WikiPage(wikipage.page_content).get_paragraphs()
|
| 57 |
+
add_texts_to_collection(coll_name=collection_name, texts=texts, file=wiki_title, source='wiki')
|
| 58 |
+
else:
|
| 59 |
+
print(wikipage)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
async def my_files_upload_and_store(title: str, collection_name: str):
|
| 63 |
+
doc = title
|
| 64 |
+
title = title
|
| 65 |
+
texts = InputDoc(doc).get_paragraphs()
|
| 66 |
+
add_texts_to_collection(coll_name=collection_name, texts=texts, file=title, source='my_files')
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
"""
|
| 70 |
+
Generate Control
|
| 71 |
+
"""
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def generate_doc_from_gpt(input_txt: str) -> str:
|
| 75 |
+
input_doc = InputDoc(input_txt)
|
| 76 |
+
tasks = input_doc.tasks
|
| 77 |
+
task_resolutions = [get_public_paragraph(t) for t in tasks]
|
| 78 |
+
# task_resolutions = ["ça c'est de la réso"]
|
| 79 |
+
generated_doc = input_doc.replace_tasks(task_resolutions)
|
| 80 |
+
return generated_doc
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def generate_doc_from_db(input_txt: str, collection_name: str, from_files: [str]) -> str:
|
| 84 |
+
|
| 85 |
+
def query_from_task(task):
|
| 86 |
+
return get_public_paragraph(task)
|
| 87 |
+
input_doc = InputDoc(input_txt)
|
| 88 |
+
tasks = input_doc.tasks
|
| 89 |
+
queries = [query_from_task(t) for t in tasks]
|
| 90 |
+
texts_list = [query_collection(coll_name=collection_name, query=q, from_files=from_files) for q in queries]
|
| 91 |
+
task_resolutions = [get_private_paragraph(task=task, texts=texts) for task, texts in zip(tasks, texts_list)]
|
| 92 |
+
generated_doc = input_doc.replace_tasks(task_resolutions)
|
| 93 |
+
return generated_doc
|
src/model/__pycache__/document.cpython-311.pyc
ADDED
|
Binary file (12.6 kB). View file
|
|
|
src/model/document.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Doc:
|
| 2 |
+
def __init__(self, fulltext: str = '', title: str = '', params: dict = {}):
|
| 3 |
+
self.params = params
|
| 4 |
+
self.lines = [Line(text.strip(), self.params) for text in fulltext.split("\n") if text.strip()]
|
| 5 |
+
self.title, self.lines = self._get_title(title)
|
| 6 |
+
self.container = Container(lines=self.lines, title=self.title, father=self, params=params)
|
| 7 |
+
self.tasks = [c.get_task(self.container.one_liner) for c in self.container.containers if c.task]
|
| 8 |
+
self.fulltext = fulltext
|
| 9 |
+
|
| 10 |
+
def _get_title(self, title):
|
| 11 |
+
lines = self.lines
|
| 12 |
+
if self.params['type'] == 'input_text':
|
| 13 |
+
if self.lines and self.lines[0] and self.lines[0].type == 'title':
|
| 14 |
+
title = self.lines[0].text
|
| 15 |
+
lines = lines[1:]
|
| 16 |
+
else:
|
| 17 |
+
title = 'the title is missing'
|
| 18 |
+
return title, lines
|
| 19 |
+
|
| 20 |
+
def replace_tasks(self, resolutions: [str]):
|
| 21 |
+
starts = self.params['startswith_']
|
| 22 |
+
reverts = {starts[k]: k for k in starts}
|
| 23 |
+
task_starter = reverts['task']
|
| 24 |
+
lines = self.fulltext.split('\n')
|
| 25 |
+
new_lines = [line if not line.startswith(task_starter) else next(iter(resolutions)) for line in lines]
|
| 26 |
+
new_fulltext = "\n".join(new_lines)
|
| 27 |
+
return new_fulltext
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class InputDoc(Doc):
|
| 31 |
+
|
| 32 |
+
def __init__(self, fulltext='', title=''):
|
| 33 |
+
self.params = {
|
| 34 |
+
'type': 'input_text',
|
| 35 |
+
'startswith_':
|
| 36 |
+
{'!!': 'title', '++': 'comment', '??': 'task',
|
| 37 |
+
'# ': '1', '## ': '2', '### ': '3', '####': '4', '#####': '5', '######': '6'}
|
| 38 |
+
}
|
| 39 |
+
super().__init__(fulltext=fulltext, title=title, params=self.params)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class WikiPage(Doc):
|
| 43 |
+
|
| 44 |
+
def __init__(self, fulltext='', title=''):
|
| 45 |
+
self.params = {
|
| 46 |
+
'type': 'wiki',
|
| 47 |
+
'startswith_':
|
| 48 |
+
{'== ': '1', '=== ': '2', '==== ': '3', '===== ': '4', '====== ': '5', '======= ': '6'},
|
| 49 |
+
'endswith_':
|
| 50 |
+
[' ==', ' ===', ' ====', ' =====', ' ======', ' ======'],
|
| 51 |
+
|
| 52 |
+
'discarded': ["See also", "Notes", "References", "Sources", "External links", "Bibliography",
|
| 53 |
+
"Cinematic adaptations", "Further reading", "Maps"]
|
| 54 |
+
}
|
| 55 |
+
super().__init__(fulltext=fulltext, title=title, params=self.params)
|
| 56 |
+
|
| 57 |
+
def get_paragraphs(self, chunk=500):
|
| 58 |
+
return self.container.get_paragraphs(chunk)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class Container:
|
| 62 |
+
|
| 63 |
+
def __init__(self, lines=[], level=0, title='', father=None, params={}):
|
| 64 |
+
|
| 65 |
+
self.normals = []
|
| 66 |
+
self.normal = ''
|
| 67 |
+
self.comments = []
|
| 68 |
+
self.comment = ''
|
| 69 |
+
self.tasks = []
|
| 70 |
+
self.task = ''
|
| 71 |
+
self.children = []
|
| 72 |
+
self.level = level
|
| 73 |
+
self.title = title
|
| 74 |
+
self.father = father
|
| 75 |
+
|
| 76 |
+
self._expand(lines)
|
| 77 |
+
|
| 78 |
+
if params and 'discarded' in params.keys():
|
| 79 |
+
self.children = [child for child in self.children if child.title not in params['discarded']]
|
| 80 |
+
|
| 81 |
+
self.containers = [self]
|
| 82 |
+
for child in self.children:
|
| 83 |
+
self.containers += child.containers
|
| 84 |
+
self.one_liner = self.title + ' ' + self.comment
|
| 85 |
+
self.root_text = self.one_liner + ' ' + self.normal
|
| 86 |
+
self.text = self.root_text
|
| 87 |
+
for child in self.children:
|
| 88 |
+
self.text += ' ' + child.text
|
| 89 |
+
|
| 90 |
+
self.summary = self.text
|
| 91 |
+
|
| 92 |
+
def _expand(self, lines):
|
| 93 |
+
new_child = False
|
| 94 |
+
new_child_lines = []
|
| 95 |
+
new_child_title = []
|
| 96 |
+
for line in lines:
|
| 97 |
+
if not new_child:
|
| 98 |
+
if line.type == 'normal':
|
| 99 |
+
self.normals.append(line)
|
| 100 |
+
self.normal += ' ' + line.text
|
| 101 |
+
elif line.type == 'comment':
|
| 102 |
+
self.comments.append(line)
|
| 103 |
+
self.comment += ' ' + line.text
|
| 104 |
+
elif line.type == 'task':
|
| 105 |
+
self.tasks.append(line)
|
| 106 |
+
self.task += ' ' + line.text
|
| 107 |
+
elif line.is_structure:
|
| 108 |
+
new_child = True
|
| 109 |
+
new_child_lines = []
|
| 110 |
+
new_child_title = line.text
|
| 111 |
+
line.level = self.level + 1
|
| 112 |
+
self.one_liner = self.title + self.comment
|
| 113 |
+
else:
|
| 114 |
+
if self.level + 1 < line.level or not line.is_structure:
|
| 115 |
+
new_child_lines.append(line)
|
| 116 |
+
elif self.level + 1 == line.level:
|
| 117 |
+
self.children.append(Container(lines=new_child_lines,
|
| 118 |
+
level=self.level + 1,
|
| 119 |
+
title=new_child_title,
|
| 120 |
+
father=self))
|
| 121 |
+
new_child_lines = []
|
| 122 |
+
new_child_title = line.text
|
| 123 |
+
if new_child:
|
| 124 |
+
self.children.append(Container(lines=new_child_lines,
|
| 125 |
+
level=self.level + 1,
|
| 126 |
+
title=new_child_title,
|
| 127 |
+
father=self))
|
| 128 |
+
|
| 129 |
+
def get_task(self, doc_one_liner):
|
| 130 |
+
siblings_ = self.father.children.copy()
|
| 131 |
+
index = siblings_.index(self)
|
| 132 |
+
siblings_before_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if idx < index]
|
| 133 |
+
siblings_after_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if index < idx]
|
| 134 |
+
|
| 135 |
+
task = {'description': self.task,
|
| 136 |
+
'about': self.one_liner,
|
| 137 |
+
'doc_description': doc_one_liner,
|
| 138 |
+
'above': self.father.one_liner,
|
| 139 |
+
'before': siblings_before_context,
|
| 140 |
+
'after': siblings_after_context}
|
| 141 |
+
return task
|
| 142 |
+
|
| 143 |
+
def get_paragraphs(self, chunk=500):
|
| 144 |
+
if len(self.text) < chunk:
|
| 145 |
+
paragraphs = [self.text]
|
| 146 |
+
else:
|
| 147 |
+
paragraphs = [self.root_text]
|
| 148 |
+
for child in self.children:
|
| 149 |
+
paragraphs += child.get_paragraphs(chunk)
|
| 150 |
+
return paragraphs
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
class Line:
|
| 154 |
+
|
| 155 |
+
def __init__(self, text, params):
|
| 156 |
+
self.text = text
|
| 157 |
+
self.type, self.text = self._parse_text(params)
|
| 158 |
+
self.level = int(self.type) if self.type.isdigit() else -1
|
| 159 |
+
self.is_structure = 0 < self.level
|
| 160 |
+
|
| 161 |
+
def _parse_text(self, params):
|
| 162 |
+
def strip_text(text_, start, end):
|
| 163 |
+
text_ = text_.split(start)[1]
|
| 164 |
+
if end != "":
|
| 165 |
+
text_ = text_.split(end)[0]
|
| 166 |
+
# text += ". \n"
|
| 167 |
+
return text_.strip()
|
| 168 |
+
|
| 169 |
+
startswith_ = params['startswith_']
|
| 170 |
+
|
| 171 |
+
endswith_ = params['endswith_'] if 'endswith_' in params.keys() else [""] * len(startswith_)
|
| 172 |
+
types = [(strip_text(self.text, starter, endswith_[i]), startswith_[starter])
|
| 173 |
+
for i, starter in enumerate(startswith_.keys())
|
| 174 |
+
if self.text.startswith(starter)]
|
| 175 |
+
(text, type_) = types[0] if types else (self.text, 'normal')
|
| 176 |
+
return type_, text.strip()
|
src/model/model.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class Container:
|
| 5 |
+
|
| 6 |
+
def __init__(self, title: str = '', fulltext: str = '', level: int = 0):
|
| 7 |
+
|
| 8 |
+
self.title = title
|
| 9 |
+
self.fulltext = fulltext
|
| 10 |
+
self.children = []
|
| 11 |
+
self.text = ''
|
| 12 |
+
self.level = level
|
| 13 |
+
self.docs = []
|
| 14 |
+
self.expand()
|
| 15 |
+
self.to_docs()
|
| 16 |
+
|
| 17 |
+
def expand(self, max_length=700):
|
| 18 |
+
|
| 19 |
+
if 0 < self.level:
|
| 20 |
+
split_title = self.fulltext.split(Container.title_separators[self.level])
|
| 21 |
+
if 1 < len(split_title):
|
| 22 |
+
self.title += ('\n' + re.sub(Container.title_headers[self.level], '', split_title[0]))
|
| 23 |
+
self.fulltext = split_title[1]
|
| 24 |
+
if self.title in Container.discarded:
|
| 25 |
+
self.fulltext = self.text = ''
|
| 26 |
+
if self.fulltext:
|
| 27 |
+
if max_length < len(self.fulltext):
|
| 28 |
+
split_text = self.fulltext.split(Container.separators[self.level])
|
| 29 |
+
if self.fulltext[0] != '=':
|
| 30 |
+
self.text += self.title + '\n' + split_text[0]
|
| 31 |
+
split_text.pop(0)
|
| 32 |
+
self.children = [Container(fulltext=t, level=self.level + 1, title=self.title) for t in split_text]
|
| 33 |
+
else:
|
| 34 |
+
self.text += '\n' + self.fulltext
|
| 35 |
+
|
| 36 |
+
def to_docs(self):
|
| 37 |
+
self.docs = [self.text] if 60 < len(self.text) else []
|
| 38 |
+
for child in self.children:
|
| 39 |
+
self.docs += child.root_text
|
| 40 |
+
|
| 41 |
+
def group_docs(self, max_length=700):
|
| 42 |
+
grouped_docs = []
|
| 43 |
+
for doc in self.docs:
|
| 44 |
+
if grouped_docs and len(grouped_docs[-1])+len(doc) < max_length:
|
| 45 |
+
doc = grouped_docs.pop()+' '+doc
|
| 46 |
+
grouped_docs.append(doc)
|
| 47 |
+
return grouped_docs
|
| 48 |
+
|
| 49 |
+
def __str__(self):
|
| 50 |
+
card = "... level : " + str(self.level) + " words :" + str(len(self.text.split(' '))) + "\n"
|
| 51 |
+
card += "... title : " + self.title[:100] + "\n"
|
| 52 |
+
card += "... text : " + self.text[:100] + "\n"
|
| 53 |
+
card += "... fulllength : " + str(len(self.fulltext)) + "\n"
|
| 54 |
+
card += "... length : " + str(len(self.text)) + "\n\n"
|
| 55 |
+
for child in self.children:
|
| 56 |
+
card += child.__str__()
|
| 57 |
+
return card
|
| 58 |
+
|
| 59 |
+
def get_texts(self):
|
| 60 |
+
return self.group_docs()
|
src/tools/__pycache__/llm_tools.cpython-311.pyc
ADDED
|
Binary file (6.95 kB). View file
|
|
|
src/tools/__pycache__/llms.cpython-311.pyc
ADDED
|
Binary file (573 Bytes). View file
|
|
|
src/tools/__pycache__/semantic_db.cpython-311.pyc
ADDED
|
Binary file (4.5 kB). View file
|
|
|
src/tools/__pycache__/wiki.cpython-311.pyc
ADDED
|
Binary file (2.82 kB). View file
|
|
|
src/tools/llm_tools.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import wikipedia
|
| 2 |
+
import json
|
| 3 |
+
from langchain import PromptTemplate
|
| 4 |
+
from langchain.vectorstores import Chroma
|
| 5 |
+
from langchain.text_splitter import CharacterTextSplitter
|
| 6 |
+
|
| 7 |
+
from src.tools.llms import openai_llm
|
| 8 |
+
from src.tools.wiki import Wiki
|
| 9 |
+
from src.model.document import WikiPage
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def get_wikilist(task: {}) -> str:
|
| 14 |
+
"""
|
| 15 |
+
get the titles of wiki pages interesting for solving the given task
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
llm = openai_llm
|
| 19 |
+
template = (f"\n"
|
| 20 |
+
f" Your task consists in finding the list of wikipedia page titles which provide useful content "
|
| 21 |
+
f" for a paragraph whose description is delimited by triple backticks: ```{task['description']}```\n"
|
| 22 |
+
f" \n"
|
| 23 |
+
f" The paragraph belongs at the top level of the hierarchy to a document"
|
| 24 |
+
f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
|
| 25 |
+
f" Make sure that the paragraph relates the top level of the document\n"
|
| 26 |
+
f" \n"
|
| 27 |
+
f" The paragraph belongs to a higher paragraph in the hierarchy \\n"
|
| 28 |
+
f" whose description is delimited by triple backticks: ``` {task['above']}```\n"
|
| 29 |
+
f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
|
| 30 |
+
f" \n"
|
| 31 |
+
f" The paragraphs comes after previous paragraphs \\n"
|
| 32 |
+
f" whose description is delimited by triple backticks: ``` {task['before']}```\n"
|
| 33 |
+
f" Make sure that the paragraph relates with previous paragraph without any repetition\n"
|
| 34 |
+
f" \n"
|
| 35 |
+
f" The paragraphs comes before next paragraphs \\n"
|
| 36 |
+
f" whose description is delimited by triple backticks: ``` {task['after']}```\n"
|
| 37 |
+
f" \n"
|
| 38 |
+
f" Format your response as a JSON list of strings separated by commas.\n"
|
| 39 |
+
f" \n"
|
| 40 |
+
f"\n"
|
| 41 |
+
f" ")
|
| 42 |
+
|
| 43 |
+
prompt = PromptTemplate(
|
| 44 |
+
input_variables=[],
|
| 45 |
+
template=template
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
#wikilist = LLMChain(llm=openai_llm, prompt=prompt).run()
|
| 49 |
+
wikilist = json.loads(llm(template))
|
| 50 |
+
|
| 51 |
+
expanded_wikilist = []
|
| 52 |
+
|
| 53 |
+
expand_factor = 3
|
| 54 |
+
|
| 55 |
+
for wikipage in wikilist:
|
| 56 |
+
expanded_wikilist += wikipedia.search(wikipage, expand_factor)
|
| 57 |
+
|
| 58 |
+
wikilist = list(set(expanded_wikilist))
|
| 59 |
+
|
| 60 |
+
return wikilist
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def get_public_paragraph(task: {}) -> str:
|
| 64 |
+
"""returns the task directly performed by chat GPT"""
|
| 65 |
+
|
| 66 |
+
llm = openai_llm
|
| 67 |
+
template = (f"\n"
|
| 68 |
+
f" Your task consists in generating a paragraph\\n"
|
| 69 |
+
f" whose description is delimited by triple backticks: ```{task['description']}```\n"
|
| 70 |
+
f"\n"
|
| 71 |
+
f" The paragraph belongs at the top level of the hierarchy to a document \\n"
|
| 72 |
+
f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
|
| 73 |
+
f" Make sure that the paragraph relates the top level of the document\n"
|
| 74 |
+
f" \n"
|
| 75 |
+
f" The paragraph belongs to a higher paragraph in the hierarchy \\n"
|
| 76 |
+
f" whose description is delimited by triple backticks: ``` {task['above']}```\n"
|
| 77 |
+
f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
|
| 78 |
+
f" \n"
|
| 79 |
+
f" The paragraphs comes after previous paragraphs \\n"
|
| 80 |
+
f" whose description is delimited by triple backticks: ``` {task['before']}```\n"
|
| 81 |
+
f" Make sure that the paragraph relates with previous paragraph without any repetition\n"
|
| 82 |
+
f" \n"
|
| 83 |
+
f" The paragraphs comes before next paragraphs \\n"
|
| 84 |
+
f" whose description is delimited by triple backticks: ``` {task['after']}```\n"
|
| 85 |
+
f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
|
| 86 |
+
f" \n"
|
| 87 |
+
f" \n"
|
| 88 |
+
f"\n"
|
| 89 |
+
f" ")
|
| 90 |
+
|
| 91 |
+
p = llm(template)
|
| 92 |
+
|
| 93 |
+
return p
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def create_index(wikilist: [str]):
|
| 97 |
+
"""
|
| 98 |
+
useful for creating the index of wikipages
|
| 99 |
+
"""
|
| 100 |
+
fetch = Wiki().fetch
|
| 101 |
+
|
| 102 |
+
pages = [(title, fetch(title)) for title in wikilist if type(fetch(title)) != str]
|
| 103 |
+
texts = []
|
| 104 |
+
chunk = 800
|
| 105 |
+
for title, page in pages:
|
| 106 |
+
texts.append(WikiPage(title=title, fulltext=page.page_content))
|
| 107 |
+
|
| 108 |
+
doc_splitter = CharacterTextSplitter(
|
| 109 |
+
separator=".",
|
| 110 |
+
chunk_size=chunk,
|
| 111 |
+
chunk_overlap=100,
|
| 112 |
+
length_function=len,
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
paragraphs = texts[0].get_paragraphs(chunk=800)
|
| 116 |
+
|
| 117 |
+
split_texts = []
|
| 118 |
+
for p in paragraphs:
|
| 119 |
+
split_texts += doc_splitter.split_text(p)
|
| 120 |
+
|
| 121 |
+
for split_text in split_texts:
|
| 122 |
+
assert type(split_text) == str
|
| 123 |
+
assert 0 < len(split_text) < 2 * 500
|
| 124 |
+
|
| 125 |
+
wiki_index = Chroma.from_texts(split_texts)
|
| 126 |
+
|
| 127 |
+
return wiki_index
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def get_wiki_paragraph(wiki_index, task: {}) -> str:
|
| 131 |
+
"""useful to get a summary in one line from wiki index"""
|
| 132 |
+
|
| 133 |
+
task_description = get_public_paragraph(task)
|
| 134 |
+
wiki_paragraphs = semantic_search(wiki_index, task_description)
|
| 135 |
+
text_content = ""
|
| 136 |
+
for p in wiki_paragraphs:
|
| 137 |
+
text_content += p.page_content + "/n/n"
|
| 138 |
+
|
| 139 |
+
template = (f"\n"
|
| 140 |
+
f" Your task consists in generating a paragraph\\n"
|
| 141 |
+
f" whose description is delimited by triple backticks: ```{task['description']}```\n"
|
| 142 |
+
f"\n"
|
| 143 |
+
f" The text generation is based in the documents provided in these sections \n"
|
| 144 |
+
f" delimited by by triple backticks: ``` {text_content}``` \n"
|
| 145 |
+
f" The paragraph belongs at the top level of the hierarchy to a document \\n"
|
| 146 |
+
f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
|
| 147 |
+
f" Make sure that the paragraph relates the top level of the document\n"
|
| 148 |
+
f" \n"
|
| 149 |
+
f" The paragraph belongs to a higher paragraph in the hierarchy \\n"
|
| 150 |
+
f" whose description is delimited by triple backticks: ``` {task['above']}```\n"
|
| 151 |
+
f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
|
| 152 |
+
f" \n"
|
| 153 |
+
f" The paragraphs comes after previous paragraphs \\n"
|
| 154 |
+
f" whose description is delimited by triple backticks: ``` {task['before']}```\n"
|
| 155 |
+
f" Make sure that the paragraph relates with previous paragraph without any repetition\n"
|
| 156 |
+
f" \n"
|
| 157 |
+
f" The paragraphs comes before next paragraphs \\n"
|
| 158 |
+
f" whose description is delimited by triple backticks: ``` {task['after']}```\n"
|
| 159 |
+
f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
|
| 160 |
+
f" \n"
|
| 161 |
+
f" \n"
|
| 162 |
+
f"\n"
|
| 163 |
+
f" ")
|
| 164 |
+
|
| 165 |
+
llm = openai_llm
|
| 166 |
+
p = llm(template)
|
| 167 |
+
|
| 168 |
+
return p
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def get_private_paragraph(texts, task: {}) -> str:
|
| 172 |
+
"""useful to get a summary in one line from wiki index"""
|
| 173 |
+
|
| 174 |
+
text_content = ""
|
| 175 |
+
for t in texts:
|
| 176 |
+
text_content += t + "/n/n"
|
| 177 |
+
|
| 178 |
+
template = (f"\n"
|
| 179 |
+
f" Your task consists in generating a paragraph\\n"
|
| 180 |
+
f" whose description is delimited by triple backticks: ```{task['description']}```\n"
|
| 181 |
+
f"\n"
|
| 182 |
+
f" The text generation is based in the documents provided in these sections \n"
|
| 183 |
+
f" delimited by by triple backticks: ``` {text_content}``` \n"
|
| 184 |
+
f" The paragraph belongs at the top level of the hierarchy to a document \\n"
|
| 185 |
+
f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
|
| 186 |
+
f" Make sure that the paragraph relates the top level of the document\n"
|
| 187 |
+
f" \n"
|
| 188 |
+
f" The paragraph belongs to a higher paragraph in the hierarchy \\n"
|
| 189 |
+
f" whose description is delimited by triple backticks: ``` {task['above']}```\n"
|
| 190 |
+
f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
|
| 191 |
+
f" \n"
|
| 192 |
+
f" The paragraphs comes after previous paragraphs \\n"
|
| 193 |
+
f" whose description is delimited by triple backticks: ``` {task['before']}```\n"
|
| 194 |
+
f" Make sure that the paragraph relates with previous paragraph without any repetition\n"
|
| 195 |
+
f" \n"
|
| 196 |
+
f" The paragraphs comes before next paragraphs \\n"
|
| 197 |
+
f" whose description is delimited by triple backticks: ``` {task['after']}```\n"
|
| 198 |
+
f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
|
| 199 |
+
f" \n"
|
| 200 |
+
f" \n"
|
| 201 |
+
f"\n"
|
| 202 |
+
f" ")
|
| 203 |
+
|
| 204 |
+
llm = openai_llm
|
| 205 |
+
p = llm(template)
|
| 206 |
+
|
| 207 |
+
return p
|
src/tools/llms.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from langchain.llms import OpenAI
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
OpenAI_KEY = "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"
|
| 8 |
+
os.environ["OPENAI_API_KEY"] = OpenAI_KEY
|
| 9 |
+
openai_llm = OpenAI(temperature=0)
|
| 10 |
+
|
| 11 |
+
SERPAPI_API_KEY = "dba90c4ecfa942f37e2b9eb2e7c6600ef7fb5c02ab8bbfacef426773df14c06b"
|
| 12 |
+
os.environ["SERPAPI_API_KEY"] = SERPAPI_API_KEY
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
HF_API_KEY = "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"
|
| 17 |
+
hf_llm = HuggingFaceHub(repo_id="google/flan-t5-small",
|
| 18 |
+
model_kwargs={"temperature": 0, "max_length": 1000},
|
| 19 |
+
huggingfacehub_api_token=HF_API_KEY)
|
| 20 |
+
"""
|
src/tools/semantic_db.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import chromadb
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
|
| 4 |
+
chroma_client = chromadb.Client()
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def get_or_create_collection(coll_name: str):
|
| 8 |
+
date = coll_name[:6]
|
| 9 |
+
coll = chroma_client.get_or_create_collection(name=coll_name, metadata={"date": date})
|
| 10 |
+
return coll
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def get_collection(coll_name: str):
|
| 14 |
+
coll = chroma_client.get_collection(name=coll_name)
|
| 15 |
+
return coll
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def reset_collection(coll_name: str):
|
| 19 |
+
coll = chroma_client.get_collection(name=coll_name)
|
| 20 |
+
coll.delete()
|
| 21 |
+
return coll
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def delete_old_collections(old=2):
|
| 25 |
+
collections = chroma_client.list_collections()
|
| 26 |
+
current_hour = int(datetime.now().strftime("%m%d%H"))
|
| 27 |
+
|
| 28 |
+
for coll in collections:
|
| 29 |
+
coll_hour = int(coll.metadata['date'])
|
| 30 |
+
if coll_hour < current_hour - old:
|
| 31 |
+
chroma_client.delete_collection(coll.name)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def add_texts_to_collection(coll_name: str, texts: [str], file: str, source: str):
|
| 35 |
+
"""
|
| 36 |
+
add texts to a collection : texts originate all from the same file
|
| 37 |
+
"""
|
| 38 |
+
coll = chroma_client.get_collection(name=coll_name)
|
| 39 |
+
filenames = [{file: 1, 'source': source} for _ in texts]
|
| 40 |
+
ids = [file+'-'+str(i) for i in range(len(texts))]
|
| 41 |
+
coll.delete(ids=ids)
|
| 42 |
+
coll.add(documents=texts, metadatas=filenames, ids=ids)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def delete_collection(coll_name: str):
|
| 46 |
+
chroma_client.delete_collection(name=coll_name)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def list_collections():
|
| 50 |
+
return chroma_client.list_collections()
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def query_collection(coll_name: str, query: str, from_files: [str], n_results: int = 4):
|
| 54 |
+
assert 0 < len(from_files)
|
| 55 |
+
coll = chroma_client.get_collection(name=coll_name)
|
| 56 |
+
where_ = [{file: 1} for file in from_files]
|
| 57 |
+
where_ = where_[0] if len(where_) == 1 else {'$or': where_}
|
| 58 |
+
n_results_ = min(n_results, coll.count())
|
| 59 |
+
ans = coll.query(query_texts=query, n_results=n_results_, where=where_)
|
| 60 |
+
return ans
|
src/tools/wiki.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Union
|
| 2 |
+
|
| 3 |
+
from langchain.docstore.base import Docstore
|
| 4 |
+
from langchain.docstore.document import Document
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Wiki(Docstore):
|
| 9 |
+
"""
|
| 10 |
+
Wrapper around wikipedia API.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
def __init__(self) -> None:
|
| 14 |
+
"""Check that wikipedia package is installed."""
|
| 15 |
+
try:
|
| 16 |
+
import wikipedia # noqa: F401
|
| 17 |
+
except ImportError:
|
| 18 |
+
raise ValueError(
|
| 19 |
+
"Could not import wikipedia python package. "
|
| 20 |
+
"Please install it with `pip install wikipedia`."
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
@staticmethod
|
| 24 |
+
def fetch(searched_page: str) -> Union[str, Document]:
|
| 25 |
+
"""
|
| 26 |
+
Try to fetch for wiki page.
|
| 27 |
+
|
| 28 |
+
If page exists, return the page summary, and a PageWithLookups object.
|
| 29 |
+
If page does not exist, return similar entries.
|
| 30 |
+
"""
|
| 31 |
+
import wikipedia
|
| 32 |
+
|
| 33 |
+
try:
|
| 34 |
+
# wikipedia.set_lang("fr")
|
| 35 |
+
page_content = wikipedia.page(searched_page).content
|
| 36 |
+
url = wikipedia.page(searched_page).url
|
| 37 |
+
result: Union[str, Document] = Document(
|
| 38 |
+
page_content=page_content, metadata={"page": url}
|
| 39 |
+
)
|
| 40 |
+
except wikipedia.PageError:
|
| 41 |
+
result = f"Could not find [{searched_page}]. Similar: {wikipedia.search(searched_page)}"
|
| 42 |
+
|
| 43 |
+
except wikipedia.DisambiguationError:
|
| 44 |
+
result = f"Could not find [{searched_page}]. Similar: {wikipedia.search(searched_page)}"
|
| 45 |
+
return result
|
| 46 |
+
|
| 47 |
+
def search(searched_context: str) -> [str]:
|
| 48 |
+
"""
|
| 49 |
+
Finds wiki page title in relation with the given context
|
| 50 |
+
"""
|
| 51 |
+
import wikipedia
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
# wikipedia.set_lang("fr")
|
| 55 |
+
page_title_list = wikipedia.search(searched_context)
|
| 56 |
+
result = page_title_list
|
| 57 |
+
except wikipedia.PageError:
|
| 58 |
+
result = f"Could not find [{searched_context}]."
|
| 59 |
+
return result
|
| 60 |
+
|
| 61 |
+
|
tests/.chroma/index/id_to_uuid_0c55a091-9f95-4a8d-b868-83d95412fdc4.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f74f5b108819cf0f64b68349537a233944b0a46f682c367c23f8ce581ed3cea8
|
| 3 |
+
size 444
|
tests/.chroma/index/id_to_uuid_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:445b3f8d8a6b5cc676c01a5d1b4b67946a920fcce477e27e5b48cbdfeca61755
|
| 3 |
+
size 446
|
tests/.chroma/index/id_to_uuid_37825327-eef6-4255-92ac-787c21197d77.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5ee30e4022243d2b6b479c491ee60b3f5e7c539e3a362328642405f73cf8fee
|
| 3 |
+
size 695
|
tests/.chroma/index/id_to_uuid_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f3997be16750f5f7b3978674fea2364fa6ab97c2dc426acb80eb1d422511443
|
| 3 |
+
size 98
|
tests/.chroma/index/id_to_uuid_46204504-325f-47e6-9176-e2054080ad57.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dfaa528132e4e0652c83416114d1454c9b1d0b6e0fb1a8ef20b9734edb35323b
|
| 3 |
+
size 287
|
tests/.chroma/index/id_to_uuid_64afc7c0-c153-47d1-af52-55e1738ae76c.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f833dce2979b320eaec2bb9154f769efb4c634e230149487dd7f221ecc25606
|
| 3 |
+
size 447
|
tests/.chroma/index/id_to_uuid_69550299-be81-45fa-8bbf-3d83be2d7991.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22ba07768895e87dbf09941729a79568b660241f036aa4d8121353c2facf4bba
|
| 3 |
+
size 288
|
tests/.chroma/index/id_to_uuid_78f80853-f999-4f5e-b320-41c98bd28592.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c056689ec182e73b665df5dcf2e863da9c5e3445fe588071cb3cd689d318df46
|
| 3 |
+
size 286
|
tests/.chroma/index/id_to_uuid_90d6076c-bb50-40ed-90a1-2df2243fd12e.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91659f10e125e40759e9951d8ab50e05d48fe8b13eb602ba18ebf12c2f7ef216
|
| 3 |
+
size 97
|
tests/.chroma/index/id_to_uuid_a10bf13e-424a-41cd-bcfb-27d8072711ea.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbd8492d26db8287ca812566c52964275721c15d0c06fbc160aa130e70433873
|
| 3 |
+
size 286
|
tests/.chroma/index/id_to_uuid_a6f9bfcf-0593-40b1-a282-a54d5b75d939.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c62d35faab2d47684f6cdefe34f1ac5a45a300aee5eb85e3897c1a2651943675
|
| 3 |
+
size 97
|
tests/.chroma/index/id_to_uuid_aba244c9-042f-42a3-860c-a68e1ee0b4a5.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94e55e34d33c3c27775e0b6ccffd32c0f845ee8106fe96f667ba0804221d4b0d
|
| 3 |
+
size 98
|
tests/.chroma/index/id_to_uuid_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:138806738237bc2498973bd85fe7493093ed4eaea0fa61bdcf22c15aeaf88da9
|
| 3 |
+
size 441
|
tests/.chroma/index/id_to_uuid_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5eb17c0f92ddc34e926384623c01a49f5fb2fe87a4605235402491faa8140af6
|
| 3 |
+
size 97
|
tests/.chroma/index/id_to_uuid_bcb0093e-68dd-4d75-a758-63ef7a681d92.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45ff2265b5042692a7436434dc4ec86fda1045b264ef22325a633d7c310479b1
|
| 3 |
+
size 441
|
tests/.chroma/index/id_to_uuid_bf57b36f-a918-4484-b897-79f751d5cad4.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfce13fc2f13ad950016bb48ccbbbd04ce13be07c748476dde1f940fcdfa0d52
|
| 3 |
+
size 5
|
tests/.chroma/index/id_to_uuid_e208b245-d2cd-4069-9a8c-d5f010d91afb.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:276ff93bdb0f54dcda2d4bd238698fdf417d26879c578acb0520865cc1079631
|
| 3 |
+
size 444
|
tests/.chroma/index/id_to_uuid_f09229bd-8639-49e8-8a84-8e6e0aa11971.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfce13fc2f13ad950016bb48ccbbbd04ce13be07c748476dde1f940fcdfa0d52
|
| 3 |
+
size 5
|
tests/.chroma/index/index_0c55a091-9f95-4a8d-b868-83d95412fdc4.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89aef35a2393395327581f3629478d4db7b6fd820f317ed13e60d05b4bdcd30d
|
| 3 |
+
size 35444
|
tests/.chroma/index/index_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89aef35a2393395327581f3629478d4db7b6fd820f317ed13e60d05b4bdcd30d
|
| 3 |
+
size 35444
|
tests/.chroma/index/index_37825327-eef6-4255-92ac-787c21197d77.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fcd0348e194c5b4661616aab5d4f4d1847f2a0bd5d5295b6230075a7bfb000ad
|
| 3 |
+
size 35444
|
tests/.chroma/index/index_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c915470fb0aee4ad80d1313da2ba6caae6ee813ffb23c4a3a4d73dd610e492c2
|
| 3 |
+
size 3456
|
tests/.chroma/index/index_46204504-325f-47e6-9176-e2054080ad57.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3b7bcd06e5d6cc6c260fe5da47466cfc406a9e294063c5033210b435e5e69b6
|
| 3 |
+
size 13604
|
tests/.chroma/index/index_64afc7c0-c153-47d1-af52-55e1738ae76c.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89aef35a2393395327581f3629478d4db7b6fd820f317ed13e60d05b4bdcd30d
|
| 3 |
+
size 35444
|
tests/.chroma/index/index_69550299-be81-45fa-8bbf-3d83be2d7991.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3b7bcd06e5d6cc6c260fe5da47466cfc406a9e294063c5033210b435e5e69b6
|
| 3 |
+
size 13604
|
tests/.chroma/index/index_78f80853-f999-4f5e-b320-41c98bd28592.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3b7bcd06e5d6cc6c260fe5da47466cfc406a9e294063c5033210b435e5e69b6
|
| 3 |
+
size 13604
|
tests/.chroma/index/index_90d6076c-bb50-40ed-90a1-2df2243fd12e.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c915470fb0aee4ad80d1313da2ba6caae6ee813ffb23c4a3a4d73dd610e492c2
|
| 3 |
+
size 3456
|
tests/.chroma/index/index_a10bf13e-424a-41cd-bcfb-27d8072711ea.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3b7bcd06e5d6cc6c260fe5da47466cfc406a9e294063c5033210b435e5e69b6
|
| 3 |
+
size 13604
|