|
|
""" |
|
|
TODO: add a boolean to switch llms |
|
|
""" |
|
|
|
|
|
|
|
|
import json |
|
|
import string |
|
|
import openai |
|
|
|
|
|
import wikipedia |
|
|
from langchain.text_splitter import CharacterTextSplitter |
|
|
from langchain.prompts import PromptTemplate |
|
|
from langchain.chains import LLMChain |
|
|
from src.domain.block import Block |
|
|
from src.llm.llms import openai_llm |
|
|
from src.tools.wiki import Wiki |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def get_wikilist(task: {}) -> str: |
|
|
""" |
|
|
get the titles of wiki pages interesting for solving the given task |
|
|
""" |
|
|
|
|
|
llm = openai_llm |
|
|
template = (f"\n" |
|
|
f" Your task consists in finding the list of wikipedia page titles which provide useful content " |
|
|
f" for a paragraph whose description is delimited by triple backticks: ```{task['description']}```\n" |
|
|
f" " |
|
|
f" Make sure that you provide no more than 10 elements and that the list is actually finished." |
|
|
f" Format your response as a valid JSON list of strings separated by commas.\n" |
|
|
f" \n" |
|
|
f" ") |
|
|
|
|
|
|
|
|
llm_list = llm.invoke(template) |
|
|
try: |
|
|
wikilist = json.loads(llm_list) |
|
|
except: |
|
|
print("json loads failed with" + llm_list) |
|
|
wikilist = list(llm_list.split(',')) |
|
|
|
|
|
expanded_wikilist = [] |
|
|
|
|
|
expand_factor = 2 |
|
|
|
|
|
for wikipage in wikilist: |
|
|
expanded_wikilist += wikipedia.search(wikipage, expand_factor) |
|
|
|
|
|
wikilist = list(set(expanded_wikilist)) |
|
|
|
|
|
return wikilist |
|
|
|
|
|
|
|
|
def extract_list(llm_list: str): |
|
|
|
|
|
def filter_(el: str): |
|
|
resp = 2 < len(el) |
|
|
usable_length = len([c for c in el if c in string.ascii_letters]) |
|
|
resp = resp and len(el)*3/4 < usable_length |
|
|
return resp |
|
|
|
|
|
try: |
|
|
wikilist = llm_list[1:-1].split('"') |
|
|
wikilist = [el for el in wikilist if filter_(el)] |
|
|
print(wikilist) |
|
|
except: |
|
|
wikilist = [] |
|
|
print('issues with the wikilist') |
|
|
return wikilist |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_public_paragraph(task: {}) -> str: |
|
|
"""returns the task directly performed by chat GPT""" |
|
|
print(task) |
|
|
llm = openai_llm |
|
|
template = (f"\n" |
|
|
f" Your task consists in generating a paragraph\\n" |
|
|
f" whose description is delimited by triple backticks: ```{task['description']}```\n" |
|
|
f"\n" |
|
|
f" The paragraph belongs at the top level of the hierarchy to a document \\n" |
|
|
f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n" |
|
|
f" Make sure that the paragraph relates the top level of the document\n" |
|
|
f" \n" |
|
|
f" The paragraph belongs to a higher paragraph in the hierarchy \\n" |
|
|
f" whose description is delimited by triple backticks: ``` {task['above']}```\n" |
|
|
f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n" |
|
|
f" \n" |
|
|
f" The paragraphs comes after previous paragraphs \\n" |
|
|
f" whose description is delimited by triple backticks: ``` {task['before']}```\n" |
|
|
f" Make sure that the paragraph relates with previous paragraph without any repetition\n" |
|
|
f" \n" |
|
|
f" The paragraphs comes before next paragraphs \\n" |
|
|
f" whose description is delimited by triple backticks: ``` {task['after']}```\n" |
|
|
f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n" |
|
|
f" \n" |
|
|
f" \n" |
|
|
f"\n" |
|
|
f" ") |
|
|
|
|
|
p = llm.invoke(template) |
|
|
|
|
|
return p |
|
|
|
|
|
|
|
|
def create_index(wikilist: [str]): |
|
|
""" |
|
|
useful for creating the index of wikipages |
|
|
""" |
|
|
fetch = Wiki().fetch |
|
|
|
|
|
pages = [(title, fetch(title)) for title in wikilist if type(fetch(title)) != str] |
|
|
texts = [] |
|
|
chunk = 800 |
|
|
for title, page in pages: |
|
|
texts.append(WikiPage(title=title, fulltext=page.page_content)) |
|
|
|
|
|
doc_splitter = CharacterTextSplitter( |
|
|
separator=".", |
|
|
chunk_size=chunk, |
|
|
chunk_overlap=100, |
|
|
length_function=len, |
|
|
) |
|
|
|
|
|
paragraphs = texts[0].get_paragraphs(chunk=800) |
|
|
|
|
|
split_texts = [] |
|
|
for p in paragraphs: |
|
|
split_texts += doc_splitter.split_text(p) |
|
|
|
|
|
for split_text in split_texts: |
|
|
assert type(split_text) == str |
|
|
assert 0 < len(split_text) < 2 * 500 |
|
|
|
|
|
wiki_index = Chroma.from_texts(split_texts) |
|
|
|
|
|
return wiki_index |
|
|
|
|
|
|
|
|
def get_wiki_paragraph(wiki_index, task: {}) -> str: |
|
|
"""useful to get a summary in one line from wiki index""" |
|
|
|
|
|
task_description = get_public_paragraph(task) |
|
|
wiki_paragraphs = semantic_search(wiki_index, task_description) |
|
|
text_content = "" |
|
|
for p in wiki_paragraphs: |
|
|
text_content += p.page_content + "/n/n" |
|
|
|
|
|
template = (f"\n" |
|
|
f" Your task consists in generating a paragraph\\n" |
|
|
f" whose description is delimited by triple backticks: ```{task['description']}```\n" |
|
|
f"\n" |
|
|
f" The text generation is based in the documents provided in these sections \n" |
|
|
f" delimited by by triple backticks: ``` {text_content}``` \n" |
|
|
f" The paragraph belongs at the top level of the hierarchy to a document \\n" |
|
|
f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n" |
|
|
f" Make sure that the paragraph relates the top level of the document\n" |
|
|
f" \n" |
|
|
f" The paragraph belongs to a higher paragraph in the hierarchy \\n" |
|
|
f" whose description is delimited by triple backticks: ``` {task['above']}```\n" |
|
|
f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n" |
|
|
f" \n" |
|
|
f" The paragraphs comes after previous paragraphs \\n" |
|
|
f" whose description is delimited by triple backticks: ``` {task['before']}```\n" |
|
|
f" Make sure that the paragraph relates with previous paragraph without any repetition\n" |
|
|
f" \n" |
|
|
f" The paragraphs comes before next paragraphs \\n" |
|
|
f" whose description is delimited by triple backticks: ``` {task['after']}```\n" |
|
|
f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n" |
|
|
f" \n" |
|
|
f" \n" |
|
|
f"\n" |
|
|
f" ") |
|
|
|
|
|
llm = openai_llm |
|
|
p = llm(template) |
|
|
|
|
|
return p |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_private_paragraph(texts, task: {}) -> str: |
|
|
"""useful to get a summary in one line from wiki index""" |
|
|
|
|
|
text_content = "" |
|
|
for t in texts: |
|
|
text_content += t + "/n/n" |
|
|
|
|
|
template = (f"\n" |
|
|
f" Your task consists in generating a paragraph\\n" |
|
|
f" whose description is delimited by triple backticks: ```{task['description']}```\n" |
|
|
f"\n" |
|
|
f" The text generation is based in the documents provided in these sections \n" |
|
|
f" delimited by by triple backticks: ``` {text_content}``` \n" |
|
|
f" The paragraph belongs at the top level of the hierarchy to a document \\n" |
|
|
f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n" |
|
|
f" Make sure that the paragraph relates the top level of the document\n" |
|
|
f" \n" |
|
|
f" The paragraph belongs to a higher paragraph in the hierarchy \\n" |
|
|
f" whose description is delimited by triple backticks: ``` {task['above']}```\n" |
|
|
f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n" |
|
|
f" \n" |
|
|
f" The paragraphs comes after previous paragraphs \\n" |
|
|
f" whose description is delimited by triple backticks: ``` {task['before']}```\n" |
|
|
f" Make sure that the paragraph relates with previous paragraph without any repetition\n" |
|
|
f" \n" |
|
|
f" The paragraphs comes before next paragraphs \\n" |
|
|
f" whose description is delimited by triple backticks: ``` {task['after']}```\n" |
|
|
f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n" |
|
|
f" \n" |
|
|
f" \n" |
|
|
f"\n" |
|
|
f" ") |
|
|
|
|
|
llm = openai_llm |
|
|
p = llm.invoke(template) |
|
|
|
|
|
return p |
|
|
|
|
|
def summarize_paragraph_v2(prompt : str, title_doc : str = '', title_para : str = ''): |
|
|
max_tokens = 850 |
|
|
location_of_the_paragraph = prompt.split(" :")[0] |
|
|
"""summarizes the paragraph""" |
|
|
task = (f"Your task consists in summarizing in English the paragraph of the document untitled ```{title_doc}``` located in the ```{location_of_the_paragraph}``` section of the document." |
|
|
f"The paragraph title is ```{title_para}```." |
|
|
f"Your response shall be concise and shall respect the following format:" |
|
|
f"<summary>" |
|
|
f"If you see that the summary that you are creating will not respect ```{max_tokens}``` tokens, find a way to make it shorter.") |
|
|
generation = openai.chat.completions.create(model="gpt-3.5-turbo-16k", messages=[{"role":"system","content":task},{"role":"user","content":prompt}]) |
|
|
res = generation.choices[0].message.content |
|
|
print("****************") |
|
|
print(res) |
|
|
print("----") |
|
|
return str(res).strip() |
|
|
|
|
|
def generate_response_to_exigence(exigence : str, titre_exigence : str, content : str): |
|
|
""" |
|
|
Generates a response to an exigence depending on the context of the exigence and the blocks of the document. |
|
|
""" |
|
|
task = (f"Your task consists in generating a response to a requirement in a tender for Orange, a telecommunication operator." |
|
|
f"The requirement dealing with {titre_exigence} is expressed below between triple backquotes:" |
|
|
f"```{exigence}```" |
|
|
f"Your answer should be precise, consistent and as concise as possible with no politeness formulas and strictly be based on the following text delimited by triple backquotes : ```{content}```" |
|
|
) |
|
|
llm = openai_llm |
|
|
generation = llm.invoke(task) |
|
|
return generation |
|
|
|