Spaces:
Runtime error
Runtime error
adrien.aribaut-gaudin
commited on
Commit
·
d24ee64
1
Parent(s):
6aa774c
added the possibility to answer in french
Browse files- src/control/control.py +4 -3
- src/model/container.py +15 -1
- src/tools/llm.py +16 -7
src/control/control.py
CHANGED
|
@@ -12,13 +12,14 @@ class Chatbot:
|
|
| 12 |
|
| 13 |
def get_response(self, query, histo):
|
| 14 |
histo_conversation, histo_queries = self._get_histo(histo)
|
| 15 |
-
|
|
|
|
| 16 |
block_sources = self.retriever.similarity_search(query=queries)
|
| 17 |
block_sources = self._select_best_sources(block_sources)
|
| 18 |
sources_contents = [s.content for s in block_sources]
|
| 19 |
context = '\n'.join(sources_contents)
|
| 20 |
-
answer = self.llm.generate_paragraph(query=queries, histo=histo_conversation, context=context, language=
|
| 21 |
-
answer = self.llm.generate_answer(
|
| 22 |
answer = self._clean_answer(answer)
|
| 23 |
return answer, block_sources
|
| 24 |
|
|
|
|
| 12 |
|
| 13 |
def get_response(self, query, histo):
|
| 14 |
histo_conversation, histo_queries = self._get_histo(histo)
|
| 15 |
+
langage_of_query = self.llm.detect_language(query)
|
| 16 |
+
queries = self.llm.translate(text=histo_queries) if langage_of_query.lower() == 'fr' else histo_queries
|
| 17 |
block_sources = self.retriever.similarity_search(query=queries)
|
| 18 |
block_sources = self._select_best_sources(block_sources)
|
| 19 |
sources_contents = [s.content for s in block_sources]
|
| 20 |
context = '\n'.join(sources_contents)
|
| 21 |
+
answer = self.llm.generate_paragraph(query=queries, histo=histo_conversation, context=context, language=langage_of_query)
|
| 22 |
+
answer = self.llm.generate_answer(answer=answer, query=query, histo=histo_conversation, context=context,language=langage_of_query)
|
| 23 |
answer = self._clean_answer(answer)
|
| 24 |
return answer, block_sources
|
| 25 |
|
src/model/container.py
CHANGED
|
@@ -77,7 +77,21 @@ class Container:
|
|
| 77 |
|
| 78 |
return attached_paragraphs, children
|
| 79 |
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
# def create_children(self, paragraphs: [Paragraph], level: int, index: [int]):
|
| 83 |
# """
|
|
|
|
| 77 |
|
| 78 |
return attached_paragraphs, children
|
| 79 |
|
| 80 |
+
@property
|
| 81 |
+
def text_chunks(self, chunk=500):
|
| 82 |
+
text_chunks = []
|
| 83 |
+
text_chunk = ""
|
| 84 |
+
for p in self.paragraphs:
|
| 85 |
+
if chunk < len(text_chunk) + len(p.text):
|
| 86 |
+
text_chunks.append(text_chunk)
|
| 87 |
+
text_chunk = ""
|
| 88 |
+
else:
|
| 89 |
+
text_chunk += " " + p.text
|
| 90 |
+
if text_chunk and not text_chunk.isspace():
|
| 91 |
+
text_chunks.append(text_chunk)
|
| 92 |
+
for child in self.children:
|
| 93 |
+
text_chunks += child.text_chunks
|
| 94 |
+
return text_chunks
|
| 95 |
|
| 96 |
# def create_children(self, paragraphs: [Paragraph], level: int, index: [int]):
|
| 97 |
# """
|
src/tools/llm.py
CHANGED
|
@@ -34,23 +34,32 @@ class LlmAgent:
|
|
| 34 |
p = self.llm(template)
|
| 35 |
return p
|
| 36 |
|
| 37 |
-
def generate_answer(self, query: str,
|
| 38 |
-
"""provides the final answer
|
| 39 |
|
| 40 |
def _cut_unfinished_sentence(s: str):
|
| 41 |
return '.'.join(s.split('.')[:-1])
|
| 42 |
|
| 43 |
-
template = (f"Your task consists in
|
| 44 |
f"delimited by triple backticks: ```{query}``` \\n"
|
| 45 |
-
f"You are given the answer in
|
| 46 |
-
f"\\n You don't add new content to the answer
|
| 47 |
-
f"\\n 1 You can use some vocabulary from the context
|
| 48 |
f"```{context}```"
|
| 49 |
f"\\n 2 You are consistent and avoid redundancies with the rest of the initial"
|
| 50 |
-
f" conversation
|
| 51 |
)
|
| 52 |
|
| 53 |
p = self.llm(template)
|
| 54 |
# p = _cut_unfinished_sentence(p)
|
| 55 |
return p
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
|
|
|
| 34 |
p = self.llm(template)
|
| 35 |
return p
|
| 36 |
|
| 37 |
+
def generate_answer(self, query: str, answer: str, histo: str, context: str, language: str) -> str:
|
| 38 |
+
"""provides the final answer based on the initial query and the answer"""
|
| 39 |
|
| 40 |
def _cut_unfinished_sentence(s: str):
|
| 41 |
return '.'.join(s.split('.')[:-1])
|
| 42 |
|
| 43 |
+
template = (f"Your task consists in answering to the query from users in {language} "
|
| 44 |
f"delimited by triple backticks: ```{query}``` \\n"
|
| 45 |
+
f"You are given the answer in {language} delimited by triple backticks: ```{answer}```"
|
| 46 |
+
f"\\n You don't add new content to the answer but: "
|
| 47 |
+
f"\\n 1 You can use some vocabulary from the context delimited by triple backticks: "
|
| 48 |
f"```{context}```"
|
| 49 |
f"\\n 2 You are consistent and avoid redundancies with the rest of the initial"
|
| 50 |
+
f" conversation delimited by triple backticks: ```{histo}```"
|
| 51 |
)
|
| 52 |
|
| 53 |
p = self.llm(template)
|
| 54 |
# p = _cut_unfinished_sentence(p)
|
| 55 |
return p
|
| 56 |
+
|
| 57 |
+
def detect_language(self, text: str) -> str:
|
| 58 |
+
"""detects the language"""
|
| 59 |
+
template = (f"Your task consists in detecting the language of the following text delimited by triple backticks: "
|
| 60 |
+
f"```{text}```"
|
| 61 |
+
f"Your answer shall be the two letters code of the language"
|
| 62 |
+
)
|
| 63 |
+
p = self.llm(template)
|
| 64 |
+
return p
|
| 65 |
|