diff --git a/81/generated_text.txt b/81/generated_text.txt new file mode 100644 index 0000000000000000000000000000000000000000..10cf5ac8c0450aeaf8d099c7da18d7f19e15c860 --- /dev/null +++ b/81/generated_text.txt @@ -0,0 +1,7 @@ +!! terrorism in Italy in the years 70 and 80 + +# what happened + +## the killing of Aldo Moro + Aldo Moro was an Italian statesman and a prominent member of the Christian Democracy party. He was kidnapped and killed by the Red Brigades, a left-wing terrorist group, in 1978. The Red Brigades wanted to destabilize the Italian government and force the release of their imprisoned members. They believed that Moro was the key to achieving their goals, and so they targeted him for assassination. Moro's death was a major blow to the Italian government and to the Christian Democracy party, and it marked the beginning of a period of increased terrorism in Italy. The Red Brigades were eventually disbanded, but the legacy of Moro's death still lingers in Italy today. + diff --git a/README.md b/README.md index 20f428637926b11ebcbe819e18ca3b03c3e84a97..fe71889f2a2b25e00a18fe05b92a74e3a1db4b9b 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ --- -title: GPTdoc -emoji: 🌍 -colorFrom: purple -colorTo: gray +title: Gendoc +emoji: 🦀 +colorFrom: indigo +colorTo: blue sdk: gradio sdk_version: 3.32.0 app_file: app.py diff --git a/__pycache__/test_input_file.cpython-311-pytest-7.3.1.pyc b/__pycache__/test_input_file.cpython-311-pytest-7.3.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30768440c0889c7c0911cd6c3f7c49100b4b7886 Binary files /dev/null and b/__pycache__/test_input_file.cpython-311-pytest-7.3.1.pyc differ diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..32145d28bea5d71939e4455ae6c62ee66463fee9 --- /dev/null +++ b/app.py @@ -0,0 +1,374 @@ +import os.path +import gradio as gr +import numpy as np +import asyncio +import shutil + +import src.control.control as control + + +""" +================================== +A. Component part +================================== +""" + +with gr.Blocks() as docgpt: + with gr.Row(): + + with gr.Column(): + pass + + with gr.Column(scale=10): + """ + 1. input docs components + """ + + gr.Markdown("# 1. Define the plan of your document") + + f = open('data/usage.txt', 'r') + usage = f.read() + + input_text = gr.Textbox( + label="enter your text", + lines=25, + max_lines=25, + interactive=True, + elem_classes="selected_", + placeholder=usage, + ) + + with gr.Row(): + upload_btn = gr.UploadButton(type='file') + example1 = gr.Button("One task example") + example2 = gr.Button("Several tasks example") + + """ + 2. source components + """ + + gr.Markdown("# 2. Choose the sources for the document generation") + + with gr.Column(visible=True, variant='panel') as select_col: + gr.Markdown("### Select the sources") + source_radio = gr.Radio( + choices=["Unknown sources", "My own sources"], + label="", + value="Unknown sources", + visible=True, + ) + + with gr.Column(visible=False, variant='panel') as db_col: + gr.Markdown("### My sources") + db_list_comp = gr.CheckboxGroup( + label="Current content", + info="These documents are currently your sources. Unselect the documents you don't want to be taken" + "into account when generating the document", + visible=True, + interactive=True, + ) + with gr.Row(): + db_reset_btn = gr.Button("Reset the sources", visible=False).style(full_width=False, size="sm") + db_add_doc_btn = gr.Button("Add new documents", visible=True).style(full_width=False, size="sm") + + with gr.Column(visible=False, variant="panel") as add_col: + gr.Markdown("### Add new documents ") + + with gr.Tab("From Wikipedia"): + wiki_fetch_btn = gr.Button("Search for Wikipedia pages", visible=True) + wiki_fetch_btn.style(full_width=False, size="sm") + wiki_list_comp = gr.CheckboxGroup( + label="Select the wiki pages", + info="The selected pages can be added to sources", + visible=False, + interactive=True, + ) + + wiki_add_to_db_btn = gr.Button("Add selection to sources", visible=False) + wiki_add_to_db_btn.style(full_width=False, size="sm") + + with gr.Tab("From disk"): + my_files_list_comp = gr.Files( + label="Upload own documents", + info="Your selected documents provide the content for generating the output document", + visible=True, + ) + my_files_add_to_db_btn = gr.Button("Add files to sources", visible=False) + my_files_add_to_db_btn.style(full_width=False, size="sm") + + add_close_btn = gr.Button("Close").style(size='sm', full_width=False) + + """ + 3. Generate (and inspect the document) + """ + + gr.Markdown("# 3. Generate the document") + + generate_btn = gr.Button("Generate", interactive=True) + + output_text = gr.Textbox( + label="Generated document", + value="", + lines=25, + max_lines=25, + interactive=False, + ) + + generated_file = gr.File( + interactive=False, + visible=False, + ) + + with gr.Column(): + pass + + """ + ================================== + B. Logic part + ================================== + """ + + """ + B.1 Input text + """ + def upload_input_file(file_): + return upload_file(file_.name) + + + def upload_example_file(btn, input_id_): + filename = "onetask_example.txt" if btn == "One task example" else "long_example.txt" + long_id = control.get_long_id(input_id_) + os.mkdir('tmp_input/' + long_id) + copypath = 'tmp_input/' + long_id + '/' + filename + shutil.copy("data/" + filename, copypath) + update_ = upload_file(copypath) + update_[input_id] = gr.update(value=long_id) + return update_ + + + def upload_file(filename): + f_ = open(filename, "r") + input_text_ = f_.read() + update_ = { + input_text: gr.update(value=input_text_) + } + return update_ + + + input_id = gr.State(-1) + + upload_btn.upload(upload_input_file, inputs=[upload_btn], outputs=[input_text]) + example1.click(upload_example_file, inputs=[example1, input_id], outputs=[input_text, input_id]) + example2.click(upload_example_file, inputs=[example2, input_id], outputs=[input_text, input_id]) + + """ + -------------------- + B.2 Logic for sources + -------------------- + """ + + + def source_fn(source_, db_collection_): + """ + Allows to choose the sources for the doc generation + """ + if source_ == "My own sources": + long_id = control.get_long_id(db_collection_) + control.get_or_create_collection(long_id) + update_ = { + db_col: gr.update(visible=True), + db_collection_var: long_id, + } + else: + update_ = { + db_col: gr.update(visible=False), + } + return update_ + + + def db_reset_fn(wiki_source_, db_collection_): + """ + resets the source db + """ + coll = control.get_or_create_collection(db_collection_) + control.reset_collection(coll) + wiki_to_add_not_empty = 0 < len(wiki_source_) + update_ = { + wiki_db_var: [], + my_files_db_var: [], + db_reset_btn: gr.update(visible=False), + db_list_comp: gr.update(value=[], choices=[]), + wiki_list_comp: gr.update(value=wiki_source_, choices=wiki_source_), + wiki_add_to_db_btn: gr.update(visible=wiki_to_add_not_empty), + } + return update_ + + + def db_add_doc_fn(): + """ + opens the component which allows to add new own files or wiki to the source db + """ + update_ = { + db_add_doc_btn: gr.update(visible=False), + add_col: gr.update(visible=True), + } + return update_ + + + def add_close_fn(): + """ + close the component which allows to add new own files or wiki to the source db + """ + update_ = { + db_add_doc_btn: gr.update(visible=True), + add_col: gr.update(visible=False), + } + return update_ + + + def wiki_fetch_fn(wiki_db_files_, input_text_): + """ + fetch the wikifiles interesting for solving the tasks as defined in the input doc + """ + wiki_interesting_files = control.wiki_fetch(input_text_) + wiki_files = [wiki for wiki in wiki_interesting_files if wiki not in wiki_db_files_] + update_ = { + wiki_list_comp: gr.update(visible=True, value=wiki_files, choices=wiki_files), + wiki_add_to_db_btn: gr.update(visible=True), + wiki_source_var: wiki_interesting_files, + } + return update_ + + + async def wiki_add_to_db_fn(wiki_list_, wiki_source_, wiki_db_, db_list_, db_collection_): + """ + adds the wikipages to the db source + """ + wiki_to_add = [wiki for wiki in wiki_list_ if wiki not in wiki_db_] + db_list_ += wiki_to_add + wiki_db_ += wiki_to_add + wiki_source_remaining = [wiki for wiki in wiki_source_ if wiki not in wiki_db_] + tasks = [control.wiki_upload_and_store(wiki, db_collection_) for wiki in wiki_to_add] + await asyncio.gather(*tasks) + db_not_empty = 0 < len(db_list_) + wiki_to_add_not_empty = 0 < len(wiki_source_remaining) + update_ = { + wiki_db_var: wiki_db_, + wiki_list_comp: gr.update(value=wiki_source_remaining, choices=wiki_source_remaining), + wiki_add_to_db_btn: gr.update(visible=wiki_to_add_not_empty), + db_list_comp: gr.update( + visible=True, + value=db_list_, + choices=db_list_, + label="Database content"), + db_reset_btn: gr.update(visible=db_not_empty), + generate_btn: gr.update(visible=True, interactive=db_not_empty), + } + return update_ + + + def my_files_list_fn(my_files_list_): + + update_ = { + my_files_add_to_db_btn: gr.update(visible=bool(my_files_list_)) + } + return update_ + + + async def my_files_add_to_db_fn(my_files_list_, my_files_db_, db_list_): + """ + adds the files to the db source + """ + my_files_to_add = [fi.name for fi in my_files_list_ if fi.name not in my_files_db_] + tasks = [control.my_files_upload_and_store(f_name) for f_name in my_files_to_add] + await asyncio.gather(*tasks) + my_files_to_add = [os.path.basename(f_name) for f_name in my_files_to_add] + my_files_db_ += my_files_to_add + db_list_ += my_files_to_add + update_ = { + my_files_list_comp: gr.update(value=None), + my_files_add_to_db_btn: gr.update(visible=False), + my_files_db_var: gr.update(value=my_files_db_), + generate_btn: gr.update(interactive=True), + db_reset_btn: gr.update(visible=True), + db_list_comp: gr.update( + visible=True, + value=db_list_, + choices=db_list_, + label="Database content"), + } + return update_ + + + wiki_source_var: [str] = gr.State([]) # list of wikipage titles of interest for the input text tasks + wiki_db_var: [str] = gr.State([]) # list of wiki document titles in the db (as seen from the UI) + my_files_db_var: [str] = gr.State([]) # list of titles of the files uploaded in the db (as seen from the UI) + db_collection_var: str = gr.State(-1) # name of the collection of documents sources in the db + + source_radio.change(source_fn, inputs=[source_radio, db_collection_var], outputs=[db_col, db_collection_var]) + db_add_doc_btn.click(db_add_doc_fn, inputs=[], outputs=[db_add_doc_btn, add_col]) + add_close_btn.click(add_close_fn, inputs=[], outputs=[db_add_doc_btn, add_col]) + + wiki_fetch_btn.click(wiki_fetch_fn, + inputs=[wiki_db_var, input_text], + outputs=[wiki_list_comp, wiki_source_var, wiki_add_to_db_btn]) + wiki_add_to_db_btn.click(wiki_add_to_db_fn, + inputs=[wiki_list_comp, wiki_source_var, wiki_db_var, db_list_comp, db_collection_var], + outputs=[db_list_comp, wiki_list_comp, wiki_db_var, + generate_btn, wiki_add_to_db_btn, db_reset_btn]) + + my_files_list_comp.change(my_files_list_fn, inputs=[my_files_list_comp], outputs=[my_files_add_to_db_btn]) + my_files_add_to_db_btn.click(my_files_add_to_db_fn, + inputs=[my_files_list_comp, my_files_db_var, db_list_comp], + outputs=[my_files_add_to_db_btn, my_files_list_comp, my_files_db_var, + db_reset_btn, generate_btn, db_list_comp]) + db_reset_btn.click(db_reset_fn, + inputs=[wiki_source_var, db_collection_var], + outputs=[wiki_db_var, my_files_db_var, db_list_comp, db_reset_btn, + db_add_doc_btn, wiki_list_comp, wiki_add_to_db_btn]) + + """ + -------------------- + B.3 Logic for generation + -------------------- + """ + + + def generate_fn(input_text_, source_, db_collection_, db_list_): + """ + generates the final text starting from the input text and the source : either "public" or private = from + documents stored in the collection in the db + """ + rand_dir_path = "./" + str(np.random.randint(1000)) + os.mkdir(rand_dir_path) + fpath = rand_dir_path + "/generated_text.txt" + f_ = open(fpath, "w") + + if source_ == "Unknown sources": + output_text_ = control.generate_doc_from_gpt(input_text_) + else: + coll = db_collection_ + output_text_ = control.generate_doc_from_db(input_txt=input_text_, + collection_name=coll, + from_files=db_list_) + f_.write(output_text_) + f_.seek(0) + + update_ = { + output_text: gr.update(value=output_text_), + generated_file: gr.update(visible=True, value=f_.name), + } + return update_ + + + generate_btn.click(generate_fn, + inputs=[input_text, source_radio, db_collection_var, db_list_comp], + outputs=[output_text, generated_file]) + + """ + ================================== + Launch + ================================== + """ + +docgpt.queue().launch() diff --git a/data/list b/data/list new file mode 100644 index 0000000000000000000000000000000000000000..9d7f4c544bde1bbbe5ed3e1a0b05b24804b3a9d4 --- /dev/null +++ b/data/list @@ -0,0 +1,20 @@ +[ +"Years of Lead (Italy)", +"Terrorism in Italy", +"Red Brigades", +"Ordine Nuovo", +"Years of Lead (Italy)", +"Cold War", +"Terrorism in Europe", +"Palestinian terrorism" +] + +prompt = f""" +Your task is to identify the title of relevant wikipedia pages which would be helpful \ +to expand on this text. + +Give the page titles in the form of a JSON list, the text is delimited by triple \ +backticks. + +Text: ```{text}``` +""" \ No newline at end of file diff --git a/data/long_example.txt b/data/long_example.txt new file mode 100644 index 0000000000000000000000000000000000000000..82f34b2811b336f4dc54f71746cfe71d607df69c --- /dev/null +++ b/data/long_example.txt @@ -0,0 +1,21 @@ +!! terrorism in Italy in the years 70 and 80 + +# what happened +++ describe the facts in Italy +It is a fact , that Italy undergone several acts of terrorism in the 70s ans 80s + +## summary of all events linked to terrorism +?? summarize terrorism events from 70 to 90 in Italy (around 100 words) + +## the major events +?? identify several events and describe no more than 5 events (around 50 words per event) +## the major organisations +?? identify major organisations (political parties, terrorists groups, etc.) and key individuals +# the global context +++ give some context outside of Italy +## a specific period during the cold war between USSR and the USA +?? describe the specificities of the relationship between the US and USSR (around 100 words) +## the terrorism in the 70-80s in Europe +?? identify terrorism facts in the rest of Europe (e.g. Germany, France, Belgium) (around 50 words per fact) +## Palestinian terrorism in the 70s 80s +?? give some infos on Palestinian terrorism facts: acts, organisation and key individuals (around 100 words) \ No newline at end of file diff --git a/data/onetask_example.txt b/data/onetask_example.txt new file mode 100644 index 0000000000000000000000000000000000000000..c896f7eff51466651180349fdfd968234b0def6d --- /dev/null +++ b/data/onetask_example.txt @@ -0,0 +1,7 @@ +!! terrorism in Italy in the years 70 and 80 + +# what happened + +## the killing of Aldo Moro +?? who killed Aldo Moro and why? (around 100 words) + diff --git a/data/usage.txt b/data/usage.txt new file mode 100644 index 0000000000000000000000000000000000000000..6f1ecd9c484d7d54a119f616595d95747382f305 --- /dev/null +++ b/data/usage.txt @@ -0,0 +1,13 @@ +!! Title + +# Heading level 1 +## Heading level 2 +### Heading level 3 +#### and so on ... + +?? Description of the paragraph to be generated + +++ Comment: adds additional context for the text generator + +normal text: it is taken into account by the text generator but remains as is in the generated document + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..a034a6a8d34cdea270b9ea7883f0974c18fd1751 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,111 @@ +aiofiles==23.1.0 +aiohttp==3.8.4 +aiosignal==1.3.1 +altair==4.2.2 +anyio==3.6.2 +async-timeout==4.0.2 +attrs==23.1.0 +backoff==2.2.1 +beautifulsoup4==4.12.2 +cachetools==5.3.0 +certifi==2022.12.7 +charset-normalizer==3.1.0 +chromadb==0.3.21 +click==8.1.3 +clickhouse-connect==0.5.20 +contourpy==1.0.7 +cycler==0.11.0 +dataclasses-json==0.5.7 +duckdb==0.7.1 +entrypoints==0.4 +fastapi==0.95.1 +ffmpy==0.3.0 +filelock==3.11.0 +fonttools==4.39.3 +frozenlist==1.3.3 +fsspec==2023.4.0 +google-search-results==2.4.2 +gptcache==0.1.12 +gradio==3.27.0 +gradio_client==0.1.3 +h11==0.14.0 +hnswlib==0.7.0 +httpcore==0.17.0 +httptools==0.5.0 +httpx==0.24.0 +huggingface-hub==0.13.4 +idna==3.4 +iniconfig==2.0.0 +Jinja2==3.1.2 +joblib==1.2.0 +jsonschema==4.17.3 +kiwisolver==1.4.4 +langchain==0.0.141 +linkify-it-py==2.0.0 +lz4==4.3.2 +markdown-it-py==2.2.0 +MarkupSafe==2.1.2 +marshmallow==3.19.0 +marshmallow-enum==1.5.1 +matplotlib==3.7.1 +mdit-py-plugins==0.3.3 +mdurl==0.1.2 +monotonic==1.6 +mpmath==1.3.0 +multidict==6.0.4 +mypy-extensions==1.0.0 +networkx==3.1 +nltk==3.8.1 +numpy==1.24.2 +openai==0.27.4 +openapi-schema-pydantic==1.2.4 +orjson==3.8.10 +packaging==23.1 +pandas==2.0.0 +Pillow==9.5.0 +pluggy==1.0.0 +posthog==3.0.0 +pydantic==1.10.7 +pydub==0.25.1 +pyparsing==3.0.9 +pyrsistent==0.19.3 +pytest==7.3.1 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-multipart==0.0.6 +pytz==2023.3 +PyYAML==6.0 +regex==2023.3.23 +requests==2.28.2 +scikit-learn==1.2.2 +scipy==1.10.1 +semantic-version==2.10.0 +sentence-transformers==2.2.2 +sentencepiece==0.1.98 +six==1.16.0 +sniffio==1.3.0 +soupsieve==2.4.1 +SQLAlchemy==1.4.47 +starlette==0.26.1 +sympy==1.11.1 +tenacity==8.2.2 +threadpoolctl==3.1.0 +tokenizers==0.13.3 +toolz==0.12.0 +torch==2.0.0 +torchvision==0.15.1 +tqdm==4.65.0 +transformers==4.28.1 +typing-inspect==0.8.0 +typing_extensions==4.5.0 +tzdata==2023.3 +uc-micro-py==1.0.1 +urllib3==1.26.15 +uvicorn==0.21.1 +uvloop==0.17.0 +watchfiles==0.19.0 +websockets==11.0.2 +wget==3.2 +wikipedia==1.4.0 +yarl==1.8.2 +zstandard==0.21.0 diff --git a/src/control/__pycache__/control.cpython-311.pyc b/src/control/__pycache__/control.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3170117c4fa7702b93a1f2cec73c8dc7e63c7a69 Binary files /dev/null and b/src/control/__pycache__/control.cpython-311.pyc differ diff --git a/src/control/control.py b/src/control/control.py new file mode 100644 index 0000000000000000000000000000000000000000..6a1a5a98a053e8419f78803a00c966a14b145497 --- /dev/null +++ b/src/control/control.py @@ -0,0 +1,93 @@ + +import asyncio # on va en avoir besoin :) +import string +import random +from datetime import datetime + + +from src.tools.semantic_db import get_or_create_collection, reset_collection +from src.tools.wiki import Wiki +from src.model.document import InputDoc, WikiPage +from src.tools.llm_tools import get_wikilist, get_public_paragraph, get_private_paragraph +from src.tools.semantic_db import add_texts_to_collection, query_collection + +""" +Tools +""" + + +def get_long_id(id_): + if id_ != -1: + return id_ + else: + now = datetime.now().strftime("%m%d%H%M") + letters = string.ascii_lowercase + string.digits + long_id = now+'-'+''.join(random.choice(letters) for _ in range(10)) + return long_id + + +""" +Input control +""" + + + +""" +Source Control +""" + +def wiki_fetch(input_text: str) -> [str]: + """ + returns the title of the wikipages corresponding to the tasks described in the input text + """ + tasks = InputDoc(input_text).tasks + wiki_lists = [get_wikilist(t) for t in tasks] + flatten_wiki_list = list(set().union(*[set(w) for w in wiki_lists])) + return flatten_wiki_list + + +async def wiki_upload_and_store(wiki_title: str, collection_name: str): + """ + uploads one wikipage and stores them into the right collection + """ + wikipage = Wiki().fetch(wiki_title) + wiki_title = wiki_title + if type(wikipage) != str: + texts = WikiPage(wikipage.page_content).get_paragraphs() + add_texts_to_collection(coll_name=collection_name, texts=texts, file=wiki_title, source='wiki') + else: + print(wikipage) + + +async def my_files_upload_and_store(title: str, collection_name: str): + doc = title + title = title + texts = InputDoc(doc).get_paragraphs() + add_texts_to_collection(coll_name=collection_name, texts=texts, file=title, source='my_files') + + +""" +Generate Control +""" + + +def generate_doc_from_gpt(input_txt: str) -> str: + input_doc = InputDoc(input_txt) + tasks = input_doc.tasks + task_resolutions = [get_public_paragraph(t) for t in tasks] + # task_resolutions = ["ça c'est de la réso"] + generated_doc = input_doc.replace_tasks(task_resolutions) + return generated_doc + + +def generate_doc_from_db(input_txt: str, collection_name: str, from_files: [str]) -> str: + + def query_from_task(task): + return get_public_paragraph(task) + input_doc = InputDoc(input_txt) + tasks = input_doc.tasks + queries = [query_from_task(t) for t in tasks] + texts_list = [query_collection(coll_name=collection_name, query=q, from_files=from_files) for q in queries] + task_resolutions = [get_private_paragraph(task=task, texts=texts) for task, texts in zip(tasks, texts_list)] + generated_doc = input_doc.replace_tasks(task_resolutions) + return generated_doc diff --git a/src/model/__pycache__/document.cpython-311.pyc b/src/model/__pycache__/document.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7600a17061f543b0c392bc175ae8158b0813fca8 Binary files /dev/null and b/src/model/__pycache__/document.cpython-311.pyc differ diff --git a/src/model/document.py b/src/model/document.py new file mode 100644 index 0000000000000000000000000000000000000000..4c767a608c07f10eabef40e877da71a532a46339 --- /dev/null +++ b/src/model/document.py @@ -0,0 +1,176 @@ +class Doc: + def __init__(self, fulltext: str = '', title: str = '', params: dict = {}): + self.params = params + self.lines = [Line(text.strip(), self.params) for text in fulltext.split("\n") if text.strip()] + self.title, self.lines = self._get_title(title) + self.container = Container(lines=self.lines, title=self.title, father=self, params=params) + self.tasks = [c.get_task(self.container.one_liner) for c in self.container.containers if c.task] + self.fulltext = fulltext + + def _get_title(self, title): + lines = self.lines + if self.params['type'] == 'input_text': + if self.lines and self.lines[0] and self.lines[0].type == 'title': + title = self.lines[0].text + lines = lines[1:] + else: + title = 'the title is missing' + return title, lines + + def replace_tasks(self, resolutions: [str]): + starts = self.params['startswith_'] + reverts = {starts[k]: k for k in starts} + task_starter = reverts['task'] + lines = self.fulltext.split('\n') + new_lines = [line if not line.startswith(task_starter) else next(iter(resolutions)) for line in lines] + new_fulltext = "\n".join(new_lines) + return new_fulltext + + +class InputDoc(Doc): + + def __init__(self, fulltext='', title=''): + self.params = { + 'type': 'input_text', + 'startswith_': + {'!!': 'title', '++': 'comment', '??': 'task', + '# ': '1', '## ': '2', '### ': '3', '####': '4', '#####': '5', '######': '6'} + } + super().__init__(fulltext=fulltext, title=title, params=self.params) + + +class WikiPage(Doc): + + def __init__(self, fulltext='', title=''): + self.params = { + 'type': 'wiki', + 'startswith_': + {'== ': '1', '=== ': '2', '==== ': '3', '===== ': '4', '====== ': '5', '======= ': '6'}, + 'endswith_': + [' ==', ' ===', ' ====', ' =====', ' ======', ' ======'], + + 'discarded': ["See also", "Notes", "References", "Sources", "External links", "Bibliography", + "Cinematic adaptations", "Further reading", "Maps"] + } + super().__init__(fulltext=fulltext, title=title, params=self.params) + + def get_paragraphs(self, chunk=500): + return self.container.get_paragraphs(chunk) + + +class Container: + + def __init__(self, lines=[], level=0, title='', father=None, params={}): + + self.normals = [] + self.normal = '' + self.comments = [] + self.comment = '' + self.tasks = [] + self.task = '' + self.children = [] + self.level = level + self.title = title + self.father = father + + self._expand(lines) + + if params and 'discarded' in params.keys(): + self.children = [child for child in self.children if child.title not in params['discarded']] + + self.containers = [self] + for child in self.children: + self.containers += child.containers + self.one_liner = self.title + ' ' + self.comment + self.root_text = self.one_liner + ' ' + self.normal + self.text = self.root_text + for child in self.children: + self.text += ' ' + child.text + + self.summary = self.text + + def _expand(self, lines): + new_child = False + new_child_lines = [] + new_child_title = [] + for line in lines: + if not new_child: + if line.type == 'normal': + self.normals.append(line) + self.normal += ' ' + line.text + elif line.type == 'comment': + self.comments.append(line) + self.comment += ' ' + line.text + elif line.type == 'task': + self.tasks.append(line) + self.task += ' ' + line.text + elif line.is_structure: + new_child = True + new_child_lines = [] + new_child_title = line.text + line.level = self.level + 1 + self.one_liner = self.title + self.comment + else: + if self.level + 1 < line.level or not line.is_structure: + new_child_lines.append(line) + elif self.level + 1 == line.level: + self.children.append(Container(lines=new_child_lines, + level=self.level + 1, + title=new_child_title, + father=self)) + new_child_lines = [] + new_child_title = line.text + if new_child: + self.children.append(Container(lines=new_child_lines, + level=self.level + 1, + title=new_child_title, + father=self)) + + def get_task(self, doc_one_liner): + siblings_ = self.father.children.copy() + index = siblings_.index(self) + siblings_before_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if idx < index] + siblings_after_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if index < idx] + + task = {'description': self.task, + 'about': self.one_liner, + 'doc_description': doc_one_liner, + 'above': self.father.one_liner, + 'before': siblings_before_context, + 'after': siblings_after_context} + return task + + def get_paragraphs(self, chunk=500): + if len(self.text) < chunk: + paragraphs = [self.text] + else: + paragraphs = [self.root_text] + for child in self.children: + paragraphs += child.get_paragraphs(chunk) + return paragraphs + + +class Line: + + def __init__(self, text, params): + self.text = text + self.type, self.text = self._parse_text(params) + self.level = int(self.type) if self.type.isdigit() else -1 + self.is_structure = 0 < self.level + + def _parse_text(self, params): + def strip_text(text_, start, end): + text_ = text_.split(start)[1] + if end != "": + text_ = text_.split(end)[0] + # text += ". \n" + return text_.strip() + + startswith_ = params['startswith_'] + + endswith_ = params['endswith_'] if 'endswith_' in params.keys() else [""] * len(startswith_) + types = [(strip_text(self.text, starter, endswith_[i]), startswith_[starter]) + for i, starter in enumerate(startswith_.keys()) + if self.text.startswith(starter)] + (text, type_) = types[0] if types else (self.text, 'normal') + return type_, text.strip() diff --git a/src/model/model.py b/src/model/model.py new file mode 100644 index 0000000000000000000000000000000000000000..300ceaf5a447872c8618d28866093a27b32cba6e --- /dev/null +++ b/src/model/model.py @@ -0,0 +1,60 @@ +import re + + +class Container: + + def __init__(self, title: str = '', fulltext: str = '', level: int = 0): + + self.title = title + self.fulltext = fulltext + self.children = [] + self.text = '' + self.level = level + self.docs = [] + self.expand() + self.to_docs() + + def expand(self, max_length=700): + + if 0 < self.level: + split_title = self.fulltext.split(Container.title_separators[self.level]) + if 1 < len(split_title): + self.title += ('\n' + re.sub(Container.title_headers[self.level], '', split_title[0])) + self.fulltext = split_title[1] + if self.title in Container.discarded: + self.fulltext = self.text = '' + if self.fulltext: + if max_length < len(self.fulltext): + split_text = self.fulltext.split(Container.separators[self.level]) + if self.fulltext[0] != '=': + self.text += self.title + '\n' + split_text[0] + split_text.pop(0) + self.children = [Container(fulltext=t, level=self.level + 1, title=self.title) for t in split_text] + else: + self.text += '\n' + self.fulltext + + def to_docs(self): + self.docs = [self.text] if 60 < len(self.text) else [] + for child in self.children: + self.docs += child.root_text + + def group_docs(self, max_length=700): + grouped_docs = [] + for doc in self.docs: + if grouped_docs and len(grouped_docs[-1])+len(doc) < max_length: + doc = grouped_docs.pop()+' '+doc + grouped_docs.append(doc) + return grouped_docs + + def __str__(self): + card = "... level : " + str(self.level) + " words :" + str(len(self.text.split(' '))) + "\n" + card += "... title : " + self.title[:100] + "\n" + card += "... text : " + self.text[:100] + "\n" + card += "... fulllength : " + str(len(self.fulltext)) + "\n" + card += "... length : " + str(len(self.text)) + "\n\n" + for child in self.children: + card += child.__str__() + return card + + def get_texts(self): + return self.group_docs() diff --git a/src/tools/__pycache__/llm_tools.cpython-311.pyc b/src/tools/__pycache__/llm_tools.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cbfc3cf1e6cfed908a3293841e00eb0fcf3d5f54 Binary files /dev/null and b/src/tools/__pycache__/llm_tools.cpython-311.pyc differ diff --git a/src/tools/__pycache__/llms.cpython-311.pyc b/src/tools/__pycache__/llms.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f28bcfb917074f254ff26736c24d31947a1c58ab Binary files /dev/null and b/src/tools/__pycache__/llms.cpython-311.pyc differ diff --git a/src/tools/__pycache__/semantic_db.cpython-311.pyc b/src/tools/__pycache__/semantic_db.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f10100efc37a1ea237054ed631d8d72f70f9f52 Binary files /dev/null and b/src/tools/__pycache__/semantic_db.cpython-311.pyc differ diff --git a/src/tools/__pycache__/wiki.cpython-311.pyc b/src/tools/__pycache__/wiki.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ca0b6aea3a822edce6ceccef636dfc6177d86bbf Binary files /dev/null and b/src/tools/__pycache__/wiki.cpython-311.pyc differ diff --git a/src/tools/llm_tools.py b/src/tools/llm_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..3a58de494ab4527676909dff4ccfe4cbe0adbc21 --- /dev/null +++ b/src/tools/llm_tools.py @@ -0,0 +1,207 @@ +import wikipedia +import json +from langchain import PromptTemplate +from langchain.vectorstores import Chroma +from langchain.text_splitter import CharacterTextSplitter + +from src.tools.llms import openai_llm +from src.tools.wiki import Wiki +from src.model.document import WikiPage + + + +def get_wikilist(task: {}) -> str: + """ + get the titles of wiki pages interesting for solving the given task + """ + + llm = openai_llm + template = (f"\n" + f" Your task consists in finding the list of wikipedia page titles which provide useful content " + f" for a paragraph whose description is delimited by triple backticks: ```{task['description']}```\n" + f" \n" + f" The paragraph belongs at the top level of the hierarchy to a document" + f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n" + f" Make sure that the paragraph relates the top level of the document\n" + f" \n" + f" The paragraph belongs to a higher paragraph in the hierarchy \\n" + f" whose description is delimited by triple backticks: ``` {task['above']}```\n" + f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n" + f" \n" + f" The paragraphs comes after previous paragraphs \\n" + f" whose description is delimited by triple backticks: ``` {task['before']}```\n" + f" Make sure that the paragraph relates with previous paragraph without any repetition\n" + f" \n" + f" The paragraphs comes before next paragraphs \\n" + f" whose description is delimited by triple backticks: ``` {task['after']}```\n" + f" \n" + f" Format your response as a JSON list of strings separated by commas.\n" + f" \n" + f"\n" + f" ") + + prompt = PromptTemplate( + input_variables=[], + template=template + ) + + #wikilist = LLMChain(llm=openai_llm, prompt=prompt).run() + wikilist = json.loads(llm(template)) + + expanded_wikilist = [] + + expand_factor = 3 + + for wikipage in wikilist: + expanded_wikilist += wikipedia.search(wikipage, expand_factor) + + wikilist = list(set(expanded_wikilist)) + + return wikilist + + +def get_public_paragraph(task: {}) -> str: + """returns the task directly performed by chat GPT""" + + llm = openai_llm + template = (f"\n" + f" Your task consists in generating a paragraph\\n" + f" whose description is delimited by triple backticks: ```{task['description']}```\n" + f"\n" + f" The paragraph belongs at the top level of the hierarchy to a document \\n" + f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n" + f" Make sure that the paragraph relates the top level of the document\n" + f" \n" + f" The paragraph belongs to a higher paragraph in the hierarchy \\n" + f" whose description is delimited by triple backticks: ``` {task['above']}```\n" + f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n" + f" \n" + f" The paragraphs comes after previous paragraphs \\n" + f" whose description is delimited by triple backticks: ``` {task['before']}```\n" + f" Make sure that the paragraph relates with previous paragraph without any repetition\n" + f" \n" + f" The paragraphs comes before next paragraphs \\n" + f" whose description is delimited by triple backticks: ``` {task['after']}```\n" + f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n" + f" \n" + f" \n" + f"\n" + f" ") + + p = llm(template) + + return p + + +def create_index(wikilist: [str]): + """ + useful for creating the index of wikipages + """ + fetch = Wiki().fetch + + pages = [(title, fetch(title)) for title in wikilist if type(fetch(title)) != str] + texts = [] + chunk = 800 + for title, page in pages: + texts.append(WikiPage(title=title, fulltext=page.page_content)) + + doc_splitter = CharacterTextSplitter( + separator=".", + chunk_size=chunk, + chunk_overlap=100, + length_function=len, + ) + + paragraphs = texts[0].get_paragraphs(chunk=800) + + split_texts = [] + for p in paragraphs: + split_texts += doc_splitter.split_text(p) + + for split_text in split_texts: + assert type(split_text) == str + assert 0 < len(split_text) < 2 * 500 + + wiki_index = Chroma.from_texts(split_texts) + + return wiki_index + + +def get_wiki_paragraph(wiki_index, task: {}) -> str: + """useful to get a summary in one line from wiki index""" + + task_description = get_public_paragraph(task) + wiki_paragraphs = semantic_search(wiki_index, task_description) + text_content = "" + for p in wiki_paragraphs: + text_content += p.page_content + "/n/n" + + template = (f"\n" + f" Your task consists in generating a paragraph\\n" + f" whose description is delimited by triple backticks: ```{task['description']}```\n" + f"\n" + f" The text generation is based in the documents provided in these sections \n" + f" delimited by by triple backticks: ``` {text_content}``` \n" + f" The paragraph belongs at the top level of the hierarchy to a document \\n" + f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n" + f" Make sure that the paragraph relates the top level of the document\n" + f" \n" + f" The paragraph belongs to a higher paragraph in the hierarchy \\n" + f" whose description is delimited by triple backticks: ``` {task['above']}```\n" + f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n" + f" \n" + f" The paragraphs comes after previous paragraphs \\n" + f" whose description is delimited by triple backticks: ``` {task['before']}```\n" + f" Make sure that the paragraph relates with previous paragraph without any repetition\n" + f" \n" + f" The paragraphs comes before next paragraphs \\n" + f" whose description is delimited by triple backticks: ``` {task['after']}```\n" + f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n" + f" \n" + f" \n" + f"\n" + f" ") + + llm = openai_llm + p = llm(template) + + return p + + +def get_private_paragraph(texts, task: {}) -> str: + """useful to get a summary in one line from wiki index""" + + text_content = "" + for t in texts: + text_content += t + "/n/n" + + template = (f"\n" + f" Your task consists in generating a paragraph\\n" + f" whose description is delimited by triple backticks: ```{task['description']}```\n" + f"\n" + f" The text generation is based in the documents provided in these sections \n" + f" delimited by by triple backticks: ``` {text_content}``` \n" + f" The paragraph belongs at the top level of the hierarchy to a document \\n" + f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n" + f" Make sure that the paragraph relates the top level of the document\n" + f" \n" + f" The paragraph belongs to a higher paragraph in the hierarchy \\n" + f" whose description is delimited by triple backticks: ``` {task['above']}```\n" + f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n" + f" \n" + f" The paragraphs comes after previous paragraphs \\n" + f" whose description is delimited by triple backticks: ``` {task['before']}```\n" + f" Make sure that the paragraph relates with previous paragraph without any repetition\n" + f" \n" + f" The paragraphs comes before next paragraphs \\n" + f" whose description is delimited by triple backticks: ``` {task['after']}```\n" + f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n" + f" \n" + f" \n" + f"\n" + f" ") + + llm = openai_llm + p = llm(template) + + return p diff --git a/src/tools/llms.py b/src/tools/llms.py new file mode 100644 index 0000000000000000000000000000000000000000..2986fac41ade4484c88a50e68d12d80035e2b024 --- /dev/null +++ b/src/tools/llms.py @@ -0,0 +1,20 @@ + +from langchain.llms import OpenAI + +import os + + +OpenAI_KEY = "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" +os.environ["OPENAI_API_KEY"] = OpenAI_KEY +openai_llm = OpenAI(temperature=0) + +SERPAPI_API_KEY = "dba90c4ecfa942f37e2b9eb2e7c6600ef7fb5c02ab8bbfacef426773df14c06b" +os.environ["SERPAPI_API_KEY"] = SERPAPI_API_KEY + + +""" +HF_API_KEY = "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" +hf_llm = HuggingFaceHub(repo_id="google/flan-t5-small", + model_kwargs={"temperature": 0, "max_length": 1000}, + huggingfacehub_api_token=HF_API_KEY) +""" diff --git a/src/tools/semantic_db.py b/src/tools/semantic_db.py new file mode 100644 index 0000000000000000000000000000000000000000..961e94009cabcdc4355ac5a2707140421a5be5d6 --- /dev/null +++ b/src/tools/semantic_db.py @@ -0,0 +1,60 @@ +import chromadb +from datetime import datetime + +chroma_client = chromadb.Client() + + +def get_or_create_collection(coll_name: str): + date = coll_name[:6] + coll = chroma_client.get_or_create_collection(name=coll_name, metadata={"date": date}) + return coll + + +def get_collection(coll_name: str): + coll = chroma_client.get_collection(name=coll_name) + return coll + + +def reset_collection(coll_name: str): + coll = chroma_client.get_collection(name=coll_name) + coll.delete() + return coll + + +def delete_old_collections(old=2): + collections = chroma_client.list_collections() + current_hour = int(datetime.now().strftime("%m%d%H")) + + for coll in collections: + coll_hour = int(coll.metadata['date']) + if coll_hour < current_hour - old: + chroma_client.delete_collection(coll.name) + + +def add_texts_to_collection(coll_name: str, texts: [str], file: str, source: str): + """ + add texts to a collection : texts originate all from the same file + """ + coll = chroma_client.get_collection(name=coll_name) + filenames = [{file: 1, 'source': source} for _ in texts] + ids = [file+'-'+str(i) for i in range(len(texts))] + coll.delete(ids=ids) + coll.add(documents=texts, metadatas=filenames, ids=ids) + + +def delete_collection(coll_name: str): + chroma_client.delete_collection(name=coll_name) + + +def list_collections(): + return chroma_client.list_collections() + + +def query_collection(coll_name: str, query: str, from_files: [str], n_results: int = 4): + assert 0 < len(from_files) + coll = chroma_client.get_collection(name=coll_name) + where_ = [{file: 1} for file in from_files] + where_ = where_[0] if len(where_) == 1 else {'$or': where_} + n_results_ = min(n_results, coll.count()) + ans = coll.query(query_texts=query, n_results=n_results_, where=where_) + return ans diff --git a/src/tools/wiki.py b/src/tools/wiki.py new file mode 100644 index 0000000000000000000000000000000000000000..6022dc5ab7a8a0381706af790159404592f7f183 --- /dev/null +++ b/src/tools/wiki.py @@ -0,0 +1,61 @@ +from typing import Union + +from langchain.docstore.base import Docstore +from langchain.docstore.document import Document + + + +class Wiki(Docstore): + """ + Wrapper around wikipedia API. + """ + + def __init__(self) -> None: + """Check that wikipedia package is installed.""" + try: + import wikipedia # noqa: F401 + except ImportError: + raise ValueError( + "Could not import wikipedia python package. " + "Please install it with `pip install wikipedia`." + ) + + @staticmethod + def fetch(searched_page: str) -> Union[str, Document]: + """ + Try to fetch for wiki page. + + If page exists, return the page summary, and a PageWithLookups object. + If page does not exist, return similar entries. + """ + import wikipedia + + try: + # wikipedia.set_lang("fr") + page_content = wikipedia.page(searched_page).content + url = wikipedia.page(searched_page).url + result: Union[str, Document] = Document( + page_content=page_content, metadata={"page": url} + ) + except wikipedia.PageError: + result = f"Could not find [{searched_page}]. Similar: {wikipedia.search(searched_page)}" + + except wikipedia.DisambiguationError: + result = f"Could not find [{searched_page}]. Similar: {wikipedia.search(searched_page)}" + return result + + def search(searched_context: str) -> [str]: + """ + Finds wiki page title in relation with the given context + """ + import wikipedia + + try: + # wikipedia.set_lang("fr") + page_title_list = wikipedia.search(searched_context) + result = page_title_list + except wikipedia.PageError: + result = f"Could not find [{searched_context}]." + return result + + diff --git a/tests/.chroma/index/id_to_uuid_0c55a091-9f95-4a8d-b868-83d95412fdc4.pkl b/tests/.chroma/index/id_to_uuid_0c55a091-9f95-4a8d-b868-83d95412fdc4.pkl new file mode 100644 index 0000000000000000000000000000000000000000..cf81c7fe254a9ee5e428bb7cd8b6ec20c3ba1701 --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_0c55a091-9f95-4a8d-b868-83d95412fdc4.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f74f5b108819cf0f64b68349537a233944b0a46f682c367c23f8ce581ed3cea8 +size 444 diff --git a/tests/.chroma/index/id_to_uuid_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.pkl b/tests/.chroma/index/id_to_uuid_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d1e506596e4558cad5a7198ad230e009e7942d7f --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:445b3f8d8a6b5cc676c01a5d1b4b67946a920fcce477e27e5b48cbdfeca61755 +size 446 diff --git a/tests/.chroma/index/id_to_uuid_37825327-eef6-4255-92ac-787c21197d77.pkl b/tests/.chroma/index/id_to_uuid_37825327-eef6-4255-92ac-787c21197d77.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0cad1158718cfa496ead953f2c678874f84dd875 --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_37825327-eef6-4255-92ac-787c21197d77.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5ee30e4022243d2b6b479c491ee60b3f5e7c539e3a362328642405f73cf8fee +size 695 diff --git a/tests/.chroma/index/id_to_uuid_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.pkl b/tests/.chroma/index/id_to_uuid_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5bf62b2b962e6c4131b4db036dea114c747e06f3 --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f3997be16750f5f7b3978674fea2364fa6ab97c2dc426acb80eb1d422511443 +size 98 diff --git a/tests/.chroma/index/id_to_uuid_46204504-325f-47e6-9176-e2054080ad57.pkl b/tests/.chroma/index/id_to_uuid_46204504-325f-47e6-9176-e2054080ad57.pkl new file mode 100644 index 0000000000000000000000000000000000000000..37cf41bc21f74225784e896ec154179ff309dcec --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_46204504-325f-47e6-9176-e2054080ad57.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfaa528132e4e0652c83416114d1454c9b1d0b6e0fb1a8ef20b9734edb35323b +size 287 diff --git a/tests/.chroma/index/id_to_uuid_64afc7c0-c153-47d1-af52-55e1738ae76c.pkl b/tests/.chroma/index/id_to_uuid_64afc7c0-c153-47d1-af52-55e1738ae76c.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d146577b49ab2c70f2eaf25e56f5c34787497ef7 --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_64afc7c0-c153-47d1-af52-55e1738ae76c.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f833dce2979b320eaec2bb9154f769efb4c634e230149487dd7f221ecc25606 +size 447 diff --git a/tests/.chroma/index/id_to_uuid_69550299-be81-45fa-8bbf-3d83be2d7991.pkl b/tests/.chroma/index/id_to_uuid_69550299-be81-45fa-8bbf-3d83be2d7991.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e2d1b8fa92d408ce0aeca54ea7b7de0bb686092b --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_69550299-be81-45fa-8bbf-3d83be2d7991.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ba07768895e87dbf09941729a79568b660241f036aa4d8121353c2facf4bba +size 288 diff --git a/tests/.chroma/index/id_to_uuid_78f80853-f999-4f5e-b320-41c98bd28592.pkl b/tests/.chroma/index/id_to_uuid_78f80853-f999-4f5e-b320-41c98bd28592.pkl new file mode 100644 index 0000000000000000000000000000000000000000..56e6a70ebd1154a923e2abf1e24b282f2d6435b2 --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_78f80853-f999-4f5e-b320-41c98bd28592.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c056689ec182e73b665df5dcf2e863da9c5e3445fe588071cb3cd689d318df46 +size 286 diff --git a/tests/.chroma/index/id_to_uuid_90d6076c-bb50-40ed-90a1-2df2243fd12e.pkl b/tests/.chroma/index/id_to_uuid_90d6076c-bb50-40ed-90a1-2df2243fd12e.pkl new file mode 100644 index 0000000000000000000000000000000000000000..fa64210da5870b2c08cf9571ed4cd260abf29c31 --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_90d6076c-bb50-40ed-90a1-2df2243fd12e.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91659f10e125e40759e9951d8ab50e05d48fe8b13eb602ba18ebf12c2f7ef216 +size 97 diff --git a/tests/.chroma/index/id_to_uuid_a10bf13e-424a-41cd-bcfb-27d8072711ea.pkl b/tests/.chroma/index/id_to_uuid_a10bf13e-424a-41cd-bcfb-27d8072711ea.pkl new file mode 100644 index 0000000000000000000000000000000000000000..446aca560c5702d04a6d8b79bd721f626892d0dc --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_a10bf13e-424a-41cd-bcfb-27d8072711ea.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbd8492d26db8287ca812566c52964275721c15d0c06fbc160aa130e70433873 +size 286 diff --git a/tests/.chroma/index/id_to_uuid_a6f9bfcf-0593-40b1-a282-a54d5b75d939.pkl b/tests/.chroma/index/id_to_uuid_a6f9bfcf-0593-40b1-a282-a54d5b75d939.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9049320ffc41cceaf16113513f1a238a71579f83 --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_a6f9bfcf-0593-40b1-a282-a54d5b75d939.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c62d35faab2d47684f6cdefe34f1ac5a45a300aee5eb85e3897c1a2651943675 +size 97 diff --git a/tests/.chroma/index/id_to_uuid_aba244c9-042f-42a3-860c-a68e1ee0b4a5.pkl b/tests/.chroma/index/id_to_uuid_aba244c9-042f-42a3-860c-a68e1ee0b4a5.pkl new file mode 100644 index 0000000000000000000000000000000000000000..51c386102d61a622a4b3137efdf7a73734aaa269 --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_aba244c9-042f-42a3-860c-a68e1ee0b4a5.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94e55e34d33c3c27775e0b6ccffd32c0f845ee8106fe96f667ba0804221d4b0d +size 98 diff --git a/tests/.chroma/index/id_to_uuid_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.pkl b/tests/.chroma/index/id_to_uuid_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a7120f1fecea01810bea199c06b331ed3a1f49cd --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:138806738237bc2498973bd85fe7493093ed4eaea0fa61bdcf22c15aeaf88da9 +size 441 diff --git a/tests/.chroma/index/id_to_uuid_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.pkl b/tests/.chroma/index/id_to_uuid_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ca0c81cd567b0fbf2899dfac6f30b56592c654e3 --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eb17c0f92ddc34e926384623c01a49f5fb2fe87a4605235402491faa8140af6 +size 97 diff --git a/tests/.chroma/index/id_to_uuid_bcb0093e-68dd-4d75-a758-63ef7a681d92.pkl b/tests/.chroma/index/id_to_uuid_bcb0093e-68dd-4d75-a758-63ef7a681d92.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1d66f43ce9c8951c677d9a221c9f36aa09573ca3 --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_bcb0093e-68dd-4d75-a758-63ef7a681d92.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ff2265b5042692a7436434dc4ec86fda1045b264ef22325a633d7c310479b1 +size 441 diff --git a/tests/.chroma/index/id_to_uuid_bf57b36f-a918-4484-b897-79f751d5cad4.pkl b/tests/.chroma/index/id_to_uuid_bf57b36f-a918-4484-b897-79f751d5cad4.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ce0eb13002bffa3912cde4dee9c5ab55c5ef3f5c --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_bf57b36f-a918-4484-b897-79f751d5cad4.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfce13fc2f13ad950016bb48ccbbbd04ce13be07c748476dde1f940fcdfa0d52 +size 5 diff --git a/tests/.chroma/index/id_to_uuid_e208b245-d2cd-4069-9a8c-d5f010d91afb.pkl b/tests/.chroma/index/id_to_uuid_e208b245-d2cd-4069-9a8c-d5f010d91afb.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9943f83d30c2f1506843b77cefe1b99362543f0f --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_e208b245-d2cd-4069-9a8c-d5f010d91afb.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:276ff93bdb0f54dcda2d4bd238698fdf417d26879c578acb0520865cc1079631 +size 444 diff --git a/tests/.chroma/index/id_to_uuid_f09229bd-8639-49e8-8a84-8e6e0aa11971.pkl b/tests/.chroma/index/id_to_uuid_f09229bd-8639-49e8-8a84-8e6e0aa11971.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ce0eb13002bffa3912cde4dee9c5ab55c5ef3f5c --- /dev/null +++ b/tests/.chroma/index/id_to_uuid_f09229bd-8639-49e8-8a84-8e6e0aa11971.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfce13fc2f13ad950016bb48ccbbbd04ce13be07c748476dde1f940fcdfa0d52 +size 5 diff --git a/tests/.chroma/index/index_0c55a091-9f95-4a8d-b868-83d95412fdc4.bin b/tests/.chroma/index/index_0c55a091-9f95-4a8d-b868-83d95412fdc4.bin new file mode 100644 index 0000000000000000000000000000000000000000..4782c47e1315b5ad01f3330dc5873db0fde13f42 --- /dev/null +++ b/tests/.chroma/index/index_0c55a091-9f95-4a8d-b868-83d95412fdc4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89aef35a2393395327581f3629478d4db7b6fd820f317ed13e60d05b4bdcd30d +size 35444 diff --git a/tests/.chroma/index/index_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.bin b/tests/.chroma/index/index_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.bin new file mode 100644 index 0000000000000000000000000000000000000000..4782c47e1315b5ad01f3330dc5873db0fde13f42 --- /dev/null +++ b/tests/.chroma/index/index_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89aef35a2393395327581f3629478d4db7b6fd820f317ed13e60d05b4bdcd30d +size 35444 diff --git a/tests/.chroma/index/index_37825327-eef6-4255-92ac-787c21197d77.bin b/tests/.chroma/index/index_37825327-eef6-4255-92ac-787c21197d77.bin new file mode 100644 index 0000000000000000000000000000000000000000..e8f14f8a0f52cbd3093957a0c60ff02a061ce974 --- /dev/null +++ b/tests/.chroma/index/index_37825327-eef6-4255-92ac-787c21197d77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcd0348e194c5b4661616aab5d4f4d1847f2a0bd5d5295b6230075a7bfb000ad +size 35444 diff --git a/tests/.chroma/index/index_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.bin b/tests/.chroma/index/index_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ede9d93d5cb7070d39a18dcfd90671af045e558 --- /dev/null +++ b/tests/.chroma/index/index_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c915470fb0aee4ad80d1313da2ba6caae6ee813ffb23c4a3a4d73dd610e492c2 +size 3456 diff --git a/tests/.chroma/index/index_46204504-325f-47e6-9176-e2054080ad57.bin b/tests/.chroma/index/index_46204504-325f-47e6-9176-e2054080ad57.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c761e98d0458c56b78a213c367e8e174b6cb31a --- /dev/null +++ b/tests/.chroma/index/index_46204504-325f-47e6-9176-e2054080ad57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3b7bcd06e5d6cc6c260fe5da47466cfc406a9e294063c5033210b435e5e69b6 +size 13604 diff --git a/tests/.chroma/index/index_64afc7c0-c153-47d1-af52-55e1738ae76c.bin b/tests/.chroma/index/index_64afc7c0-c153-47d1-af52-55e1738ae76c.bin new file mode 100644 index 0000000000000000000000000000000000000000..4782c47e1315b5ad01f3330dc5873db0fde13f42 --- /dev/null +++ b/tests/.chroma/index/index_64afc7c0-c153-47d1-af52-55e1738ae76c.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89aef35a2393395327581f3629478d4db7b6fd820f317ed13e60d05b4bdcd30d +size 35444 diff --git a/tests/.chroma/index/index_69550299-be81-45fa-8bbf-3d83be2d7991.bin b/tests/.chroma/index/index_69550299-be81-45fa-8bbf-3d83be2d7991.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c761e98d0458c56b78a213c367e8e174b6cb31a --- /dev/null +++ b/tests/.chroma/index/index_69550299-be81-45fa-8bbf-3d83be2d7991.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3b7bcd06e5d6cc6c260fe5da47466cfc406a9e294063c5033210b435e5e69b6 +size 13604 diff --git a/tests/.chroma/index/index_78f80853-f999-4f5e-b320-41c98bd28592.bin b/tests/.chroma/index/index_78f80853-f999-4f5e-b320-41c98bd28592.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c761e98d0458c56b78a213c367e8e174b6cb31a --- /dev/null +++ b/tests/.chroma/index/index_78f80853-f999-4f5e-b320-41c98bd28592.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3b7bcd06e5d6cc6c260fe5da47466cfc406a9e294063c5033210b435e5e69b6 +size 13604 diff --git a/tests/.chroma/index/index_90d6076c-bb50-40ed-90a1-2df2243fd12e.bin b/tests/.chroma/index/index_90d6076c-bb50-40ed-90a1-2df2243fd12e.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ede9d93d5cb7070d39a18dcfd90671af045e558 --- /dev/null +++ b/tests/.chroma/index/index_90d6076c-bb50-40ed-90a1-2df2243fd12e.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c915470fb0aee4ad80d1313da2ba6caae6ee813ffb23c4a3a4d73dd610e492c2 +size 3456 diff --git a/tests/.chroma/index/index_a10bf13e-424a-41cd-bcfb-27d8072711ea.bin b/tests/.chroma/index/index_a10bf13e-424a-41cd-bcfb-27d8072711ea.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c761e98d0458c56b78a213c367e8e174b6cb31a --- /dev/null +++ b/tests/.chroma/index/index_a10bf13e-424a-41cd-bcfb-27d8072711ea.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3b7bcd06e5d6cc6c260fe5da47466cfc406a9e294063c5033210b435e5e69b6 +size 13604 diff --git a/tests/.chroma/index/index_a6f9bfcf-0593-40b1-a282-a54d5b75d939.bin b/tests/.chroma/index/index_a6f9bfcf-0593-40b1-a282-a54d5b75d939.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ede9d93d5cb7070d39a18dcfd90671af045e558 --- /dev/null +++ b/tests/.chroma/index/index_a6f9bfcf-0593-40b1-a282-a54d5b75d939.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c915470fb0aee4ad80d1313da2ba6caae6ee813ffb23c4a3a4d73dd610e492c2 +size 3456 diff --git a/tests/.chroma/index/index_aba244c9-042f-42a3-860c-a68e1ee0b4a5.bin b/tests/.chroma/index/index_aba244c9-042f-42a3-860c-a68e1ee0b4a5.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ede9d93d5cb7070d39a18dcfd90671af045e558 --- /dev/null +++ b/tests/.chroma/index/index_aba244c9-042f-42a3-860c-a68e1ee0b4a5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c915470fb0aee4ad80d1313da2ba6caae6ee813ffb23c4a3a4d73dd610e492c2 +size 3456 diff --git a/tests/.chroma/index/index_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.bin b/tests/.chroma/index/index_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.bin new file mode 100644 index 0000000000000000000000000000000000000000..4782c47e1315b5ad01f3330dc5873db0fde13f42 --- /dev/null +++ b/tests/.chroma/index/index_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89aef35a2393395327581f3629478d4db7b6fd820f317ed13e60d05b4bdcd30d +size 35444 diff --git a/tests/.chroma/index/index_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.bin b/tests/.chroma/index/index_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ede9d93d5cb7070d39a18dcfd90671af045e558 --- /dev/null +++ b/tests/.chroma/index/index_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c915470fb0aee4ad80d1313da2ba6caae6ee813ffb23c4a3a4d73dd610e492c2 +size 3456 diff --git a/tests/.chroma/index/index_bcb0093e-68dd-4d75-a758-63ef7a681d92.bin b/tests/.chroma/index/index_bcb0093e-68dd-4d75-a758-63ef7a681d92.bin new file mode 100644 index 0000000000000000000000000000000000000000..4782c47e1315b5ad01f3330dc5873db0fde13f42 --- /dev/null +++ b/tests/.chroma/index/index_bcb0093e-68dd-4d75-a758-63ef7a681d92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89aef35a2393395327581f3629478d4db7b6fd820f317ed13e60d05b4bdcd30d +size 35444 diff --git a/tests/.chroma/index/index_bf57b36f-a918-4484-b897-79f751d5cad4.bin b/tests/.chroma/index/index_bf57b36f-a918-4484-b897-79f751d5cad4.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb7d3ad274d65acce8d78c1144f56bc14d1af4e3 --- /dev/null +++ b/tests/.chroma/index/index_bf57b36f-a918-4484-b897-79f751d5cad4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5155284604da2e108decd5c55a79ba1d816c0952c2b7c5746894c87596cf5a75 +size 3456 diff --git a/tests/.chroma/index/index_e208b245-d2cd-4069-9a8c-d5f010d91afb.bin b/tests/.chroma/index/index_e208b245-d2cd-4069-9a8c-d5f010d91afb.bin new file mode 100644 index 0000000000000000000000000000000000000000..4782c47e1315b5ad01f3330dc5873db0fde13f42 --- /dev/null +++ b/tests/.chroma/index/index_e208b245-d2cd-4069-9a8c-d5f010d91afb.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89aef35a2393395327581f3629478d4db7b6fd820f317ed13e60d05b4bdcd30d +size 35444 diff --git a/tests/.chroma/index/index_f09229bd-8639-49e8-8a84-8e6e0aa11971.bin b/tests/.chroma/index/index_f09229bd-8639-49e8-8a84-8e6e0aa11971.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb7d3ad274d65acce8d78c1144f56bc14d1af4e3 --- /dev/null +++ b/tests/.chroma/index/index_f09229bd-8639-49e8-8a84-8e6e0aa11971.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5155284604da2e108decd5c55a79ba1d816c0952c2b7c5746894c87596cf5a75 +size 3456 diff --git a/tests/.chroma/index/index_metadata_0c55a091-9f95-4a8d-b868-83d95412fdc4.pkl b/tests/.chroma/index/index_metadata_0c55a091-9f95-4a8d-b868-83d95412fdc4.pkl new file mode 100644 index 0000000000000000000000000000000000000000..59f0becd429a2bb40ab8c54d358f62bc4a7a8fac --- /dev/null +++ b/tests/.chroma/index/index_metadata_0c55a091-9f95-4a8d-b868-83d95412fdc4.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28efcb1285fa049b20579f4251562dc12a34928390bb51f8721f235a6cd5962a +size 73 diff --git a/tests/.chroma/index/index_metadata_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.pkl b/tests/.chroma/index/index_metadata_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.pkl new file mode 100644 index 0000000000000000000000000000000000000000..aaff35c8cb1f63b648b3e5aa3341cc234b24f0d3 --- /dev/null +++ b/tests/.chroma/index/index_metadata_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf5248ca7be6d2d19bbe198727d8f209515c1bcff5fc317067d62e6eff32dc92 +size 73 diff --git a/tests/.chroma/index/index_metadata_37825327-eef6-4255-92ac-787c21197d77.pkl b/tests/.chroma/index/index_metadata_37825327-eef6-4255-92ac-787c21197d77.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b2ba50fd7af7bda5aeaf7961f5d2963edb2f1c56 --- /dev/null +++ b/tests/.chroma/index/index_metadata_37825327-eef6-4255-92ac-787c21197d77.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b836c4897884930537c8711eb5054fa52c19747a69aa462980f20c77df0ce9 +size 73 diff --git a/tests/.chroma/index/index_metadata_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.pkl b/tests/.chroma/index/index_metadata_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9c672419108b4ebded53bdc78192704418ce8af7 --- /dev/null +++ b/tests/.chroma/index/index_metadata_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c4f8c356d231e61b148f01d724b86cc7198b1681023ddf46beccba7f8f1786e +size 73 diff --git a/tests/.chroma/index/index_metadata_46204504-325f-47e6-9176-e2054080ad57.pkl b/tests/.chroma/index/index_metadata_46204504-325f-47e6-9176-e2054080ad57.pkl new file mode 100644 index 0000000000000000000000000000000000000000..df7362bba7a97e9601cc44b4f71043cca574c797 --- /dev/null +++ b/tests/.chroma/index/index_metadata_46204504-325f-47e6-9176-e2054080ad57.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15f05f0efbd2c77ba3a075ccd20278e63b62247fd847e41f99d84cd107c91be +size 73 diff --git a/tests/.chroma/index/index_metadata_64afc7c0-c153-47d1-af52-55e1738ae76c.pkl b/tests/.chroma/index/index_metadata_64afc7c0-c153-47d1-af52-55e1738ae76c.pkl new file mode 100644 index 0000000000000000000000000000000000000000..675599d0e818a1c66330a5ada0b15874ada44411 --- /dev/null +++ b/tests/.chroma/index/index_metadata_64afc7c0-c153-47d1-af52-55e1738ae76c.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6046136433188fa8ae30ae0a8bff868332e1d53222d08c2c63113ee96c14a3 +size 73 diff --git a/tests/.chroma/index/index_metadata_69550299-be81-45fa-8bbf-3d83be2d7991.pkl b/tests/.chroma/index/index_metadata_69550299-be81-45fa-8bbf-3d83be2d7991.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5261cd15a3c7e20db6f86794d14ad7340ac8b19c --- /dev/null +++ b/tests/.chroma/index/index_metadata_69550299-be81-45fa-8bbf-3d83be2d7991.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da66b45af41c9161083ff737f23055ac315e8e8c00fa52099f903aaf452dc276 +size 73 diff --git a/tests/.chroma/index/index_metadata_78f80853-f999-4f5e-b320-41c98bd28592.pkl b/tests/.chroma/index/index_metadata_78f80853-f999-4f5e-b320-41c98bd28592.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6e5fd9c0f0bc9fc82f498a0e088f8bca75b14aa3 --- /dev/null +++ b/tests/.chroma/index/index_metadata_78f80853-f999-4f5e-b320-41c98bd28592.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:963a70c98ab995ef14fd1826da8c15512b87178bc05a8eb666127b9b0186d7e0 +size 73 diff --git a/tests/.chroma/index/index_metadata_90d6076c-bb50-40ed-90a1-2df2243fd12e.pkl b/tests/.chroma/index/index_metadata_90d6076c-bb50-40ed-90a1-2df2243fd12e.pkl new file mode 100644 index 0000000000000000000000000000000000000000..7b0a46afd718f91d2f33272fc9388ed67f4c34ac --- /dev/null +++ b/tests/.chroma/index/index_metadata_90d6076c-bb50-40ed-90a1-2df2243fd12e.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4278b650a0ae31e1911b9270df72fe120da32150338e179197c42c0d5c2b5a2 +size 73 diff --git a/tests/.chroma/index/index_metadata_a10bf13e-424a-41cd-bcfb-27d8072711ea.pkl b/tests/.chroma/index/index_metadata_a10bf13e-424a-41cd-bcfb-27d8072711ea.pkl new file mode 100644 index 0000000000000000000000000000000000000000..913ee579099c397ced636f0a6a668f93d52d014a --- /dev/null +++ b/tests/.chroma/index/index_metadata_a10bf13e-424a-41cd-bcfb-27d8072711ea.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca97d33ceb2bb6eebd2bb592c646beab75458e92fb047516c2cf65903f849ff8 +size 73 diff --git a/tests/.chroma/index/index_metadata_a6f9bfcf-0593-40b1-a282-a54d5b75d939.pkl b/tests/.chroma/index/index_metadata_a6f9bfcf-0593-40b1-a282-a54d5b75d939.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5204ff5869c104a681deee8be8f6dfa2f47b4128 --- /dev/null +++ b/tests/.chroma/index/index_metadata_a6f9bfcf-0593-40b1-a282-a54d5b75d939.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8667416de3b36ae688b9da32b804151b2c0040d0e83f0348648a8c338c4eb85 +size 73 diff --git a/tests/.chroma/index/index_metadata_aba244c9-042f-42a3-860c-a68e1ee0b4a5.pkl b/tests/.chroma/index/index_metadata_aba244c9-042f-42a3-860c-a68e1ee0b4a5.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6e77e4833fa49265024591a0cac468256d2a885e --- /dev/null +++ b/tests/.chroma/index/index_metadata_aba244c9-042f-42a3-860c-a68e1ee0b4a5.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9021164e35723402480efbca8d4abbd514ad88606a0455803d1a2160f01e5c1a +size 73 diff --git a/tests/.chroma/index/index_metadata_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.pkl b/tests/.chroma/index/index_metadata_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.pkl new file mode 100644 index 0000000000000000000000000000000000000000..c483945af3348296585a86c7f0d9051554b547b7 --- /dev/null +++ b/tests/.chroma/index/index_metadata_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44b8b5f1e6c5fcbf3f488c589674b8afb5bb2016498bf2225387e443864993fa +size 73 diff --git a/tests/.chroma/index/index_metadata_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.pkl b/tests/.chroma/index/index_metadata_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.pkl new file mode 100644 index 0000000000000000000000000000000000000000..deebc8afaa25dbd4266a21fd4b56ac590c04f779 --- /dev/null +++ b/tests/.chroma/index/index_metadata_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61f473364e2592b5a4a456542705123b6abd3661b8ea6c20939b3ae53edebec7 +size 73 diff --git a/tests/.chroma/index/index_metadata_bcb0093e-68dd-4d75-a758-63ef7a681d92.pkl b/tests/.chroma/index/index_metadata_bcb0093e-68dd-4d75-a758-63ef7a681d92.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a8a8e12dd2b85b3e90bf56eb621ad026c5dbae33 --- /dev/null +++ b/tests/.chroma/index/index_metadata_bcb0093e-68dd-4d75-a758-63ef7a681d92.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c33d94b8157cc2b6f1ed54d7a0c441b5928fb8763d986586b71ebf188a279bed +size 73 diff --git a/tests/.chroma/index/index_metadata_bf57b36f-a918-4484-b897-79f751d5cad4.pkl b/tests/.chroma/index/index_metadata_bf57b36f-a918-4484-b897-79f751d5cad4.pkl new file mode 100644 index 0000000000000000000000000000000000000000..629532517dd586fd8cb4a38e35c9152fc3e1a821 --- /dev/null +++ b/tests/.chroma/index/index_metadata_bf57b36f-a918-4484-b897-79f751d5cad4.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe29c5e286211398680f49d19e0402b898c3fc16536261ab4e892e0e53db7792 +size 73 diff --git a/tests/.chroma/index/index_metadata_e208b245-d2cd-4069-9a8c-d5f010d91afb.pkl b/tests/.chroma/index/index_metadata_e208b245-d2cd-4069-9a8c-d5f010d91afb.pkl new file mode 100644 index 0000000000000000000000000000000000000000..348aba733f351568dec955be3915d4c8b645330e --- /dev/null +++ b/tests/.chroma/index/index_metadata_e208b245-d2cd-4069-9a8c-d5f010d91afb.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c382d30dec7ac791c622adc6098d63faecf9503bad0533222013509e3cfa24 +size 73 diff --git a/tests/.chroma/index/index_metadata_f09229bd-8639-49e8-8a84-8e6e0aa11971.pkl b/tests/.chroma/index/index_metadata_f09229bd-8639-49e8-8a84-8e6e0aa11971.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5a85ca92edc9287f381fc48504d4486ef9224c64 --- /dev/null +++ b/tests/.chroma/index/index_metadata_f09229bd-8639-49e8-8a84-8e6e0aa11971.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bf95768550818eb4f50656182fa1c17c5bdc7966df862fcd21d07eaa23d0457 +size 73 diff --git a/tests/.chroma/index/uuid_to_id_0c55a091-9f95-4a8d-b868-83d95412fdc4.pkl b/tests/.chroma/index/uuid_to_id_0c55a091-9f95-4a8d-b868-83d95412fdc4.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1d317d362dc2a37480855344cd12bcd322afc510 --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_0c55a091-9f95-4a8d-b868-83d95412fdc4.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a58d77f48bc03440f4b8b24f73f85b853aa0de8caccc2a5c8924050e66b9128 +size 497 diff --git a/tests/.chroma/index/uuid_to_id_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.pkl b/tests/.chroma/index/uuid_to_id_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.pkl new file mode 100644 index 0000000000000000000000000000000000000000..fdabe6676e6b80fbe333f5ff5dc1aa0e666e3170 --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_31dac11a-6e77-49ca-a1b5-fce9e3fe275a.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95a1d2e4af1e0cb4ca4085da3bdecc3ee6af068ba2996bc9d88d5582d56d927f +size 497 diff --git a/tests/.chroma/index/uuid_to_id_37825327-eef6-4255-92ac-787c21197d77.pkl b/tests/.chroma/index/uuid_to_id_37825327-eef6-4255-92ac-787c21197d77.pkl new file mode 100644 index 0000000000000000000000000000000000000000..26c4626e56be0f29aac045370eed3dc6158e6bb1 --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_37825327-eef6-4255-92ac-787c21197d77.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb762abe0c7028b254430e1ce59f4e455c1458071cd85bab21944f5a10e6953 +size 793 diff --git a/tests/.chroma/index/uuid_to_id_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.pkl b/tests/.chroma/index/uuid_to_id_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.pkl new file mode 100644 index 0000000000000000000000000000000000000000..2b5e6db74c60626151c3d6598fad84e384e41368 --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_40ba1a00-ce47-4e51-a2d3-56eb96ecb82b.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aa8f825991d4c006aaf910ae46d3dbf2ceef7b1aa4a0ffb94363660e75646a6 +size 90 diff --git a/tests/.chroma/index/uuid_to_id_46204504-325f-47e6-9176-e2054080ad57.pkl b/tests/.chroma/index/uuid_to_id_46204504-325f-47e6-9176-e2054080ad57.pkl new file mode 100644 index 0000000000000000000000000000000000000000..900c81c55c939c331b7cadbf21677d0b6346d600 --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_46204504-325f-47e6-9176-e2054080ad57.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0811ddf730f6ac2cff2a06e2fee2db435d37d7e7869e1f9b6c2c158692837a5 +size 312 diff --git a/tests/.chroma/index/uuid_to_id_64afc7c0-c153-47d1-af52-55e1738ae76c.pkl b/tests/.chroma/index/uuid_to_id_64afc7c0-c153-47d1-af52-55e1738ae76c.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3c5f3fca4c7ec10b832f9e51ab231dd3a1c8da58 --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_64afc7c0-c153-47d1-af52-55e1738ae76c.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11623155ec96d2c529de904c181e86bdcd109999d2c0b3c8a6894af4efd3b071 +size 497 diff --git a/tests/.chroma/index/uuid_to_id_69550299-be81-45fa-8bbf-3d83be2d7991.pkl b/tests/.chroma/index/uuid_to_id_69550299-be81-45fa-8bbf-3d83be2d7991.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d1536df94d4a226488ced6efa3f8606d2ac588f9 --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_69550299-be81-45fa-8bbf-3d83be2d7991.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b6783fbde40a5014c9954bbba376dbe44915052f6de0b7741790fe3c6061930 +size 312 diff --git a/tests/.chroma/index/uuid_to_id_78f80853-f999-4f5e-b320-41c98bd28592.pkl b/tests/.chroma/index/uuid_to_id_78f80853-f999-4f5e-b320-41c98bd28592.pkl new file mode 100644 index 0000000000000000000000000000000000000000..880049c405a2c1e302972de7e9db20d2da675ee3 --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_78f80853-f999-4f5e-b320-41c98bd28592.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc24223d5e239a7fd0c446db7649a14dc0f545b494ab92c1230136093356db5 +size 312 diff --git a/tests/.chroma/index/uuid_to_id_90d6076c-bb50-40ed-90a1-2df2243fd12e.pkl b/tests/.chroma/index/uuid_to_id_90d6076c-bb50-40ed-90a1-2df2243fd12e.pkl new file mode 100644 index 0000000000000000000000000000000000000000..873dd6a2c0220d9c9c32ee628efdbc92682acbe0 --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_90d6076c-bb50-40ed-90a1-2df2243fd12e.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8312399831bb82a8d7350325ac66fd0d3fc6f7da7bb9724fb43afcf6c8782e3 +size 90 diff --git a/tests/.chroma/index/uuid_to_id_a10bf13e-424a-41cd-bcfb-27d8072711ea.pkl b/tests/.chroma/index/uuid_to_id_a10bf13e-424a-41cd-bcfb-27d8072711ea.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d1ddf0f4c2835fa99a6def83f25a5c627d11711c --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_a10bf13e-424a-41cd-bcfb-27d8072711ea.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d773e9ec6b3780135bb361c81d9960376203b2bb200a85b1fe92c5e8ae39ae5 +size 312 diff --git a/tests/.chroma/index/uuid_to_id_a6f9bfcf-0593-40b1-a282-a54d5b75d939.pkl b/tests/.chroma/index/uuid_to_id_a6f9bfcf-0593-40b1-a282-a54d5b75d939.pkl new file mode 100644 index 0000000000000000000000000000000000000000..24b4451326f76c0835a737d67693baface39c48c --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_a6f9bfcf-0593-40b1-a282-a54d5b75d939.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fedfe23c98950bc198f52e03ada748812e513243d1d6a0f96a3ac5b78cb94b6 +size 90 diff --git a/tests/.chroma/index/uuid_to_id_aba244c9-042f-42a3-860c-a68e1ee0b4a5.pkl b/tests/.chroma/index/uuid_to_id_aba244c9-042f-42a3-860c-a68e1ee0b4a5.pkl new file mode 100644 index 0000000000000000000000000000000000000000..2f47302c1af1a8af41f531da675ac4804b2cb9a4 --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_aba244c9-042f-42a3-860c-a68e1ee0b4a5.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84f4a563960b1894011d3101bc6499f4f2958b52d434fa7829eac91f537d9e27 +size 90 diff --git a/tests/.chroma/index/uuid_to_id_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.pkl b/tests/.chroma/index/uuid_to_id_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e653e7377cc1b376927a34dcc1d69c61b77050f7 --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_afc3d29f-a033-4bcf-9ef4-e93b6211ac95.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f55ebae50c1f9aea591f93af0c25160347e34e41d933cb4bc51d6ddfa8e5b6 +size 497 diff --git a/tests/.chroma/index/uuid_to_id_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.pkl b/tests/.chroma/index/uuid_to_id_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.pkl new file mode 100644 index 0000000000000000000000000000000000000000..260c22fe2e3010154d4284a771e1137b7df41139 --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_b5e184d4-5839-4b0b-9bd8-638fa6bc080a.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a2be58a5db1b8616a72df3f6db15621285d65e7bfdb7c7b51425282ab072386 +size 90 diff --git a/tests/.chroma/index/uuid_to_id_bcb0093e-68dd-4d75-a758-63ef7a681d92.pkl b/tests/.chroma/index/uuid_to_id_bcb0093e-68dd-4d75-a758-63ef7a681d92.pkl new file mode 100644 index 0000000000000000000000000000000000000000..85f727f474d3c386043a38b2ea113f4cb273893f --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_bcb0093e-68dd-4d75-a758-63ef7a681d92.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0419a3499f44f93d4641b8c9bc4aa876fbf4db76871e2b36e776d03706dd3f07 +size 497 diff --git a/tests/.chroma/index/uuid_to_id_bf57b36f-a918-4484-b897-79f751d5cad4.pkl b/tests/.chroma/index/uuid_to_id_bf57b36f-a918-4484-b897-79f751d5cad4.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ce0eb13002bffa3912cde4dee9c5ab55c5ef3f5c --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_bf57b36f-a918-4484-b897-79f751d5cad4.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfce13fc2f13ad950016bb48ccbbbd04ce13be07c748476dde1f940fcdfa0d52 +size 5 diff --git a/tests/.chroma/index/uuid_to_id_e208b245-d2cd-4069-9a8c-d5f010d91afb.pkl b/tests/.chroma/index/uuid_to_id_e208b245-d2cd-4069-9a8c-d5f010d91afb.pkl new file mode 100644 index 0000000000000000000000000000000000000000..659f458503807ff3887e56fc142cd58e2fa126ae --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_e208b245-d2cd-4069-9a8c-d5f010d91afb.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98d16e4271960e78ad2d53cd999f0c2ec6b0bc0b9b151e196291397a365b1f79 +size 497 diff --git a/tests/.chroma/index/uuid_to_id_f09229bd-8639-49e8-8a84-8e6e0aa11971.pkl b/tests/.chroma/index/uuid_to_id_f09229bd-8639-49e8-8a84-8e6e0aa11971.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ce0eb13002bffa3912cde4dee9c5ab55c5ef3f5c --- /dev/null +++ b/tests/.chroma/index/uuid_to_id_f09229bd-8639-49e8-8a84-8e6e0aa11971.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfce13fc2f13ad950016bb48ccbbbd04ce13be07c748476dde1f940fcdfa0d52 +size 5 diff --git a/tests/__pycache__/test_agent.cpython-311-pytest-7.3.1.pyc b/tests/__pycache__/test_agent.cpython-311-pytest-7.3.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..331be1caccdff318bcdc39a88f776a5a2913907c Binary files /dev/null and b/tests/__pycache__/test_agent.cpython-311-pytest-7.3.1.pyc differ diff --git a/tests/__pycache__/test_input_file.cpython-311-pytest-7.3.1.pyc b/tests/__pycache__/test_input_file.cpython-311-pytest-7.3.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..56c3cff4c6b320848193712cae36f2a5eee8a274 Binary files /dev/null and b/tests/__pycache__/test_input_file.cpython-311-pytest-7.3.1.pyc differ diff --git a/tests/__pycache__/test_semantic.cpython-311-pytest-7.3.1.pyc b/tests/__pycache__/test_semantic.cpython-311-pytest-7.3.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1ee4a1987cee42c160e91cccb3eff1225cbf0b81 Binary files /dev/null and b/tests/__pycache__/test_semantic.cpython-311-pytest-7.3.1.pyc differ diff --git a/tests/__pycache__/test_tools.cpython-311-pytest-7.3.1.pyc b/tests/__pycache__/test_tools.cpython-311-pytest-7.3.1.pyc new file mode 100644 index 0000000000000000000000000000000000000000..37dd18251528ca919f109ec1d127d951f2779add Binary files /dev/null and b/tests/__pycache__/test_tools.cpython-311-pytest-7.3.1.pyc differ diff --git a/tests/test_input_file.py b/tests/test_input_file.py new file mode 100644 index 0000000000000000000000000000000000000000..173e97c4ad5db6343d64e253da1abc60c2ae1606 --- /dev/null +++ b/tests/test_input_file.py @@ -0,0 +1,27 @@ +from src.model.document import InputDoc + +short_example = "short example" + +title_example = "!!this is about Aldo Moro" + + +def test_short_example(): + text = short_example + assert InputDoc(fulltext=text).container.normals[0].text == text + + +def test_title_example(): + text = title_example + doc = InputDoc(fulltext=text) + assert doc.title == text.split('!!')[1] + assert doc.container.title == title_example.split('!!')[1] + assert not doc.container.children + + +def test_long_example(): + text = open("../data/long_example.txt", "r").read() + doc = InputDoc(fulltext=text) + tasks = doc.tasks + assert doc.title == "terrorism in Italy in the years 70 and 80" + assert doc.container.title == "terrorism in Italy in the years 70 and 80" + assert len(doc.container.children) == 2 diff --git a/tests/test_semantic.py b/tests/test_semantic.py new file mode 100644 index 0000000000000000000000000000000000000000..8c184eade3ea1ba38df920fd69c08fb538f9f583 --- /dev/null +++ b/tests/test_semantic.py @@ -0,0 +1,122 @@ +from src.tools.semantic_db import * +from src.control.control import get_long_id + +texts_aldo = ["Aldo Moro [ˈaldo ˈmɔːro], né le 23 septembre 1916 à Maglie et mort assassiné le 9 mai 1978 à Rome " + "(ou ses environs), est un homme d'État italien membre de la Démocratie chrétienne (DC) Pendant la Seconde " + "Guerre mondiale, il est professeur de droit pénal.", + "Il est élu député en 1946, entre pour la première fois au " + "gouvernement en 1955 et dirige la Démocratie chrétienne entre 1959 et 1963. Il exerce deux fois les fonctions " + "de président du Conseil des ministres d'Italie (1963-1968 puis 1974-1976) et dirige par deux fois la diplomatie " + "italienne. Partisan du « compromis historique » entre les chrétiens-démocrates et les communistes, il est " + "enlevé en mars 1978 par les Brigades rouges.", + "Il est séquestré 55 jours et finalement assassiné par ses " + "geôliers. Les conditions de sa mort et l'incapacité des autorités de l'époque à le sauver restent des sujets " + "polémiques dans la classe politique et les médias italiens", + "Professeur de droit pénal à la faculté de droit de l'université de Bari à partir de 1940, Aldo Moro rejoint, " + "en 1941, la FUCI (Fédération universitaire des catholiques italiens) et en devient le président.", + "Après la " + "Seconde Guerre mondiale, il est élu à l'Assemblée constituante en 1946. Moro participe à la rédaction de la " + "nouvelle constitution. Il est ensuite réélu comme député à la Chambre des députés en 1948 pour y servir" + "jusqu'à sa mort.", + "Il est sous-secrétaire d'État aux Affaires étrangères (1948-1950), garde des sceaux, " + "ministre de la Justice (1955-1957), de l'Éducation nationale (1957-1958) et des Affaires étrangères " + "(1969-1972 et 1973-1974). Il est secrétaire de la Démocratie chrétienne de 1960 à 1963.", + "À partir de décembre " + "1963 et durant 4 ans et demi, il dirige le premier gouvernement italien intégrant des ministres socialistes.", + "Le centre gauche entre en crise à la fin des années 1960, affaiblissant le poids de Moro dans son parti. " + "Juriste renommé, il a été professeur de droit et de procédure pénale à la faculté de sciences politiques de " + "l'université de Rome « La Sapienza » de 1960 jusqu'à la mort." + ] + +texts_platini = ["Michel Platini, né le 21 juin 1955 à Jœuf (Meurthe-et-Moselle), est un footballeur international " + "français évoluant au poste de milieu de terrain du début des années 1970 jusqu'en 1987, avant de " + "devenir sélectionneur puis dirigeant sportif.", + "Meneur de jeu emblématique de l'équipe de France de 1976 à 1987, et en club, de l'AS Nancy-Lorraine, " + "de l'AS Saint-Étienne, puis de la Juventus de Turin, et auteur de 468 buts durant sa carrière," + "Michel Platini est considéré comme un des meilleurs joueurs de football de l'histoire.", + "Le magazine France Football le désigne meilleur footballeur français du xxe siècle, devant Zinédine " + "Zidane et Raymond Kopa, tandis que la Juventus de Turin l'a élu meilleur « Bianconero » de tous les " + "temps.", + "Il est le premier footballeur à remporter le trophée du Ballon d'or trois fois consécutivement entre " + "1983 et 1985 (record battu par Lionel Messi remportant le trophée quatre fois de suite entre 2009 et " + "2012).", + "Il fait partie de l'équipe mondiale du xxe siècle établie par la Fédération internationale de football" + " association (FIFA) en 1998."] + +query = "Quand est mort Aldo Moro?" + +coll_name = get_long_id(-1) +today = "0531" + + +def test_long_id(): + long_id = get_long_id(-1) + + assert long_id[:4] == today + + +def test_create_collection(): + coll = get_or_create_collection(coll_name, coll_name[:6]) + assert coll.name == coll_name + assert coll.count() == 0 + assert coll.metadata['date'] == coll_name[:6] + assert len(list_collections()) == 1 + + +def test_add_texts(): + coll = get_or_create_collection(coll_name, coll_name[:6]) + add_texts_to_collection(coll_name=coll_name, texts=texts_aldo, file="aldo") + assert coll.name == coll_name + assert coll.count() == 8 + assert coll.metadata['date'] == coll_name[:6] + assert len(list_collections()) == 1 + + +def test_reset_collection(): + coll = get_or_create_collection(coll_name, coll_name[:6]) + reset_collection(coll.name) + assert coll.count() == 0 + assert len(list_collections()) == 1 + + +def test_delete_collection(): + coll = get_or_create_collection(coll_name, coll_name[:6]) + delete_collection(coll.name) + assert len(list_collections()) == 0 + + +def test_delete_old_collections(): + get_or_create_collection(coll_name, coll_name[:6]) + get_or_create_collection("old_collection", '043012') + delete_old_collections() + count__ = len(list_collections()) + assert count__ == 1 + + +def test_query_collection(): + coll = get_or_create_collection(coll_name, coll_name[:6]) + add_texts_to_collection(coll_name=coll_name, texts=texts_aldo, file="aldo") + answers = query_collection(coll.name, query_=query, from_files=["aldo"], n_results=2) + assert len(answers['documents'][0]) == 2 + answers = query_collection(coll.name, query_=query, from_files=["aldo"], n_results=3) + assert len(answers['documents'][0]) == 3 + +def test_query_collection_limit(): + coll = get_or_create_collection(coll_name, coll_name[:6]) + add_texts_to_collection(coll_name=coll_name, texts=texts_aldo, file="aldo") + add_texts_to_collection(coll_name=coll_name, texts=texts_platini, file="platini") + answers = query_collection(coll.name, query_=query, from_files=["aldo"], n_results=10) + assert len(answers['documents'][0]) == 8 + + +def test_query_collection_no_answer(): + coll = get_or_create_collection(coll_name, coll_name[:6]) + answers = query_collection(coll.name, query_=query, from_files=["platini"], n_results=2) + assert len(answers['documents'][0]) == 2 + + +def test_query_collection_no_answer(): + coll = get_or_create_collection(coll_name, coll_name[:6]) + answers = query_collection(coll.name, query_=query, from_files=["platini", "aldo"], n_results=2) + assert len(answers['documents'][0]) == 2 + assert answers['metadatas'][0][0] == {'aldo': 1}, {'aldo': 1} \ No newline at end of file diff --git a/tmp_input/06011605-ixzs5jwnb9/onetask_example.txt b/tmp_input/06011605-ixzs5jwnb9/onetask_example.txt new file mode 100644 index 0000000000000000000000000000000000000000..c896f7eff51466651180349fdfd968234b0def6d --- /dev/null +++ b/tmp_input/06011605-ixzs5jwnb9/onetask_example.txt @@ -0,0 +1,7 @@ +!! terrorism in Italy in the years 70 and 80 + +# what happened + +## the killing of Aldo Moro +?? who killed Aldo Moro and why? (around 100 words) +