TI_RAG_Demo_OpenAI

Sleeping

App Files Files Community

syedmudassir16 commited on Sep 5, 2024

Commit

c2726a4

verified ·

1 Parent(s): e085441

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -23

app.py CHANGED Viewed

@@ -104,6 +104,24 @@ class DocumentRetrievalAndGeneration:
         return RetrieverTool(self)
     def run_agentic_rag(self, question: str) -> str:
         retriever_output = self.retriever_tool.run(question)
@@ -119,11 +137,26 @@ Question: {question}
 Answer:"""
         input_ids = self.tokenizer.encode(enhanced_prompt, return_tensors="pt").to(self.model.device)
         return self.generate_response_with_timeout(input_ids)
     def query_and_generate_response(self, query):
-        # Standard RAG
         similarityThreshold = 1
         query_embedding = self.embeddings.encode(query, convert_to_tensor=True).cpu().numpy()
         distances, indices = self.gpu_index.search(np.array([query_embedding]), k=3)
@@ -143,26 +176,10 @@ Answer:"""
             print(self.all_splits[idx].page_content)
             print("############################")
-        conversation = [
-            {"role": "system", "content": "You are a knowledgeable assistant with access to a comprehensive database."},
-            {"role": "user", "content": f"""
-            I need you to answer my question and provide related information in a specific format.
-            I have provided five relatable json files {content}, choose the most suitable chunks for answering the query.
-            RETURN ONLY SOLUTION without additional comments, sign-offs, retrived chunks, refrence to any Ticket or extra phrases. Be direct and to the point.
-            IF THERE IS NO ANSWER RELATABLE IN RETRIEVED CHUNKS, RETURN "NO SOLUTION AVAILABLE".
-            DO NOT GIVE REFRENCE TO ANY CHUNKS OR TICKETS,BE ON POINT.
-            Here's my question:
-            Query: {query}
-            Solution==>
-            """}
-        ]
-        input_ids = self.tokenizer.apply_chat_template(conversation, return_tensors="pt").to(self.model.device)
         start_time = datetime.now()
-        standard_response = self.generate_response_with_timeout(input_ids)
         elapsed_time = datetime.now() - start_time
         print("Generated standard response:", standard_response)
         print("Time elapsed:", elapsed_time)
         print("Device in use:", self.model.device)
@@ -170,15 +187,16 @@ Answer:"""
         standard_solution_text = standard_response.strip()
         if "Solution:" in standard_solution_text:
             standard_solution_text = standard_solution_text.split("Solution:", 1)[1].strip()
-        # Post-processing to remove "assistant" prefix
         standard_solution_text = re.sub(r'^assistant\s*', '', standard_solution_text, flags=re.IGNORECASE)
         standard_solution_text = standard_solution_text.strip()
         # Agentic RAG
         agentic_solution_text = self.run_agentic_rag(query)
-        combined_solution = f"Standard RAG Solution:\n{standard_solution_text}\n\nAgentic RAG Solution:\n{agentic_solution_text}"
         return combined_solution, content
     def qa_infer_gradio(self, query):
@@ -220,7 +238,7 @@ if __name__ == "__main__":
             examples=EXAMPLES,
             cache_examples=False,
             outputs=[gr.Textbox(label="RESPONSE"), gr.Textbox(label="RELATED QUERIES")],
-            css=css_code,
             title="TI E2E FORUM"
         )

         return RetrieverTool(self)
+    def run_standard_rag(self, query: str, content: str) -> str:
+        conversation = [
+            {"role": "system", "content": "You are a knowledgeable assistant with access to a comprehensive database."},
+            {"role": "user", "content": f"""
+            I need you to answer my question and provide related information in a specific format.
+            I have provided five relatable json files {content}, choose the most suitable chunks for answering the query.
+            RETURN ONLY SOLUTION without additional comments, sign-offs, retrived chunks, refrence to any Ticket or extra phrases. Be direct and to the point.
+            IF THERE IS NO ANSWER RELATABLE IN RETRIEVED CHUNKS, RETURN "NO SOLUTION AVAILABLE".
+            DO NOT GIVE REFRENCE TO ANY CHUNKS OR TICKETS,BE ON POINT.
+            Here's my question:
+            Query: {query}
+            Solution==>
+            """}
+        ]
+        input_ids = self.tokenizer.apply_chat_template(conversation, return_tensors="pt").to(self.model.device)
+        return self.generate_response_with_timeout(input_ids)
     def run_agentic_rag(self, question: str) -> str:
         retriever_output = self.retriever_tool.run(question)
 Answer:"""
         input_ids = self.tokenizer.encode(enhanced_prompt, return_tensors="pt").to(self.model.device)
+        return self.generate_response_with_timeout(input_ids)
+    def run_analytical_rag(self, question: str) -> str:
+        retriever_output = self.retriever_tool.run(question)
+        enhanced_prompt = f"""Using the following information retrieved from the knowledge base:
+{retriever_output}
+Provide a detailed, step-by-step analysis of the question below. Break down the problem, consider different aspects, and provide a thorough explanation. If relevant information is missing, state what additional data would be needed for a complete analysis.
+Question: {question}
+Analysis:
+1. """
+        input_ids = self.tokenizer.encode(enhanced_prompt, return_tensors="pt").to(self.model.device)
         return self.generate_response_with_timeout(input_ids)
     def query_and_generate_response(self, query):
+        # Retrieval step
         similarityThreshold = 1
         query_embedding = self.embeddings.encode(query, convert_to_tensor=True).cpu().numpy()
         distances, indices = self.gpu_index.search(np.array([query_embedding]), k=3)
             print(self.all_splits[idx].page_content)
             print("############################")
+        # Standard RAG
         start_time = datetime.now()
+        standard_response = self.run_standard_rag(query, content)
         elapsed_time = datetime.now() - start_time
         print("Generated standard response:", standard_response)
         print("Time elapsed:", elapsed_time)
         print("Device in use:", self.model.device)
         standard_solution_text = standard_response.strip()
         if "Solution:" in standard_solution_text:
             standard_solution_text = standard_solution_text.split("Solution:", 1)[1].strip()
         standard_solution_text = re.sub(r'^assistant\s*', '', standard_solution_text, flags=re.IGNORECASE)
         standard_solution_text = standard_solution_text.strip()
         # Agentic RAG
         agentic_solution_text = self.run_agentic_rag(query)
+        # Analytical RAG
+        analytical_solution_text = self.run_analytical_rag(query)
+        combined_solution = f"Standard RAG Solution:\n{standard_solution_text}\n\nAgentic RAG Solution:\n{agentic_solution_text}\n\nAnalytical RAG Solution:\n{analytical_solution_text}"
         return combined_solution, content
     def qa_infer_gradio(self, query):
             examples=EXAMPLES,
             cache_examples=False,
             outputs=[gr.Textbox(label="RESPONSE"), gr.Textbox(label="RELATED QUERIES")],
+            css=code,
             title="TI E2E FORUM"
         )