Spaces:

Hexamind
/

ChatBot_Illumio

Runtime error

App Files Files Community

adrien.aribaut-gaudin commited on Sep 29, 2023

Commit

d24ee64

1 Parent(s): 6aa774c

added the possibility to answer in french

Browse files

Files changed (3) hide show

src/control/control.py +4 -3
src/model/container.py +15 -1
src/tools/llm.py +16 -7

src/control/control.py CHANGED Viewed

@@ -12,13 +12,14 @@ class Chatbot:
     def get_response(self, query, histo):
         histo_conversation, histo_queries = self._get_histo(histo)
-        queries = histo_queries
         block_sources = self.retriever.similarity_search(query=queries)
         block_sources = self._select_best_sources(block_sources)
         sources_contents = [s.content for s in block_sources]
         context = '\n'.join(sources_contents)
-        answer = self.llm.generate_paragraph(query=queries, histo=histo_conversation, context=context, language='en')
-        answer = self.llm.generate_answer(answer_en=answer, query=query, histo=histo_conversation, context=context)
         answer = self._clean_answer(answer)
         return answer, block_sources

     def get_response(self, query, histo):
         histo_conversation, histo_queries = self._get_histo(histo)
+        langage_of_query = self.llm.detect_language(query)
+        queries = self.llm.translate(text=histo_queries) if langage_of_query.lower() == 'fr' else histo_queries
         block_sources = self.retriever.similarity_search(query=queries)
         block_sources = self._select_best_sources(block_sources)
         sources_contents = [s.content for s in block_sources]
         context = '\n'.join(sources_contents)
+        answer = self.llm.generate_paragraph(query=queries, histo=histo_conversation, context=context, language=langage_of_query)
+        answer = self.llm.generate_answer(answer=answer, query=query, histo=histo_conversation, context=context,language=langage_of_query)
         answer = self._clean_answer(answer)
         return answer, block_sources

src/model/container.py CHANGED Viewed

@@ -77,7 +77,21 @@ class Container:
         return attached_paragraphs, children
     # def create_children(self, paragraphs: [Paragraph], level: int, index: [int]):
     #     """

         return attached_paragraphs, children
+    @property
+    def text_chunks(self, chunk=500):
+        text_chunks = []
+        text_chunk = ""
+        for p in self.paragraphs:
+            if chunk < len(text_chunk) + len(p.text):
+                text_chunks.append(text_chunk)
+                text_chunk = ""
+            else:
+                text_chunk += " " + p.text
+        if text_chunk and not text_chunk.isspace():
+            text_chunks.append(text_chunk)
+        for child in self.children:
+            text_chunks += child.text_chunks
+        return text_chunks
     # def create_children(self, paragraphs: [Paragraph], level: int, index: [int]):
     #     """

src/tools/llm.py CHANGED Viewed

@@ -34,23 +34,32 @@ class LlmAgent:
         p = self.llm(template)
         return p
-    def generate_answer(self, query: str, answer_en: str, histo: str, context: str) -> str:
-        """provides the final answer in French based on the initial query and the answer in english"""
         def _cut_unfinished_sentence(s: str):
             return '.'.join(s.split('.')[:-1])
-        template = (f"Your task consists in translating the answer in French to the query "
                     f"delimited by triple backticks: ```{query}``` \\n"
-                    f"You are given the answer in english delimited by triple backticks: ```{answer_en}```"
-                    f"\\n You don't add new content to the answer in English but: "
-                    f"\\n 1 You can use some vocabulary from the context in English delimited by triple backticks: "
                     f"```{context}```"
                     f"\\n 2 You are consistent and avoid redundancies with the rest of the initial"
-                    f" conversation in English delimited by triple backticks: ```{histo}```"
                     )
         p = self.llm(template)
         # p = _cut_unfinished_sentence(p)
         return p

         p = self.llm(template)
         return p
+    def generate_answer(self, query: str, answer: str, histo: str, context: str, language: str) -> str:
+        """provides the final answer based on the initial query and the answer"""
         def _cut_unfinished_sentence(s: str):
             return '.'.join(s.split('.')[:-1])
+        template = (f"Your task consists in answering to the query from users in {language} "
                     f"delimited by triple backticks: ```{query}``` \\n"
+                    f"You are given the answer in {language} delimited by triple backticks: ```{answer}```"
+                    f"\\n You don't add new content to the answer but: "
+                    f"\\n 1 You can use some vocabulary from the context delimited by triple backticks: "
                     f"```{context}```"
                     f"\\n 2 You are consistent and avoid redundancies with the rest of the initial"
+                    f" conversation delimited by triple backticks: ```{histo}```"
                     )
         p = self.llm(template)
         # p = _cut_unfinished_sentence(p)
         return p
+    def detect_language(self, text: str) -> str:
+        """detects the language"""
+        template = (f"Your task consists in detecting the language of the following text delimited by triple backticks: "
+                    f"```{text}```"
+                    f"Your answer shall be the two letters code of the language"
+                    )
+        p = self.llm(template)
+        return p