Spaces:

bhuvanmdev
/

QA_document

Sleeping

App Files Files Community

bhuvanmdev commited on Nov 24, 2024

Commit

a9705c8

verified ·

1 Parent(s): d0c8037

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -45

app.py CHANGED Viewed

@@ -30,25 +30,31 @@ embed_model = HuggingFaceBgeEmbeddings(
     encode_kwargs={'normalize_embeddings': True}
 )
-model_name = "google/gemma-2-2b-it"#"prithivMLmods/Llama-3.2-3B-GGUF"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    trust_remote_code=True,
-    use_auth_token=True
-)
-pipe = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    max_new_tokens=2048*2,
-    temperature=0.3,
-    top_p=0.95,
-    generation_config=model.generation_config
-    # repetition_penalty=1.15
-)
-llm = HuggingFacePipeline(pipeline=pipe)
 # model.generation_config.pad_token_id = model.generation_config.eos_token_id
@@ -68,7 +74,7 @@ class RAGConfig:
     chunk_size: int = 500
     chunk_overlap: int = 100
     retriever_k: int = 3
-    persist_directory: str = "./chroma_db"
 class AdvancedRAGSystem:
     """Advanced RAG System with improved error handling and type safety"""
@@ -96,11 +102,12 @@ Context:
         self.config = config or RAGConfig()
         self.vector_store: Optional[Chroma] = None
         self.last_context: Optional[str] = None
-        self.prompt = PromptTemplate(
-            template=self.DEFAULT_TEMPLATE,
-            input_variables=["context", "question"]
-        )
     def _validate_file(self, file_path: Path) -> bool:
         """Validate if the file is of supported format and exists"""
@@ -184,20 +191,41 @@ Context:
             retrieved_docs = retriever.get_relevant_documents(question)
             context = self._format_context(retrieved_docs)
             self.last_context = context
-            # Generate response using LLM
-            response = self.llm.invoke(
-                self.prompt.format(
-                    context=context,
-                    question=question
-                )
-            )
-            return {
-                "answer": response.split("<|end_header_id|>")[-1],
-                "context": context,
-                "source_documents": len(retrieved_docs)
-            }
         except Exception as e:
             error_msg = f"Error during query processing: {str(e)}"
@@ -221,16 +249,17 @@ def create_gradio_interface(rag_system: AdvancedRAGSystem) -> gr.Blocks:
         except Exception as e:
             return f"Error: {str(e)}"
-    def query_and_update_history(question: str) -> tuple[str, str]:
         """Query system and update history with error handling"""
         try:
-            result = rag_system.query(question)
-            return (
-                result["answer"],
-                f"Last context used ({result['source_documents']} documents):\n\n{result['context']}"
-            )
         except Exception as e:
-            return str(e), "Error occurred while retrieving context"
     with gr.Blocks(title="Advanced RAG System") as demo:
         gr.Markdown("# Advanced RAG System with PDF Processing")
@@ -286,9 +315,15 @@ def create_gradio_interface(rag_system: AdvancedRAGSystem) -> gr.Blocks:
         )
         query_button.click(
-            fn=query_and_update_history,
             inputs=[question_input],
-            outputs=[answer_output, history_output]
         )
     return demo

     encode_kwargs={'normalize_embeddings': True}
 )
+model_name = "meta-llama/Llama-3.2-3B-Instruct"#"google/gemma-2-2b-it"#"prithivMLmods/Llama-3.2-3B-GGUF"
+from huggingface_hub import InferenceClient
+client = InferenceClient(model_name)
+# tokenizer = AutoTokenizer.from_pretrained(model_name)
+# model = AutoModelForCausalLM.from_pretrained(
+#     model_name,
+#     trust_remote_code=True,
+#     use_auth_token=True
+# )
+# pipe = pipeline(
+#     "text-generation",
+#     model=model,
+#     tokenizer=tokenizer,
+#     max_new_tokens=2048*2,
+#     temperature=0.3,
+#     top_p=0.95,
+#     generation_config=model.generation_config
+#     # repetition_penalty=1.15
+# )
+# llm = HuggingFacePipeline(pipeline=pipe)
 # model.generation_config.pad_token_id = model.generation_config.eos_token_id
     chunk_size: int = 500
     chunk_overlap: int = 100
     retriever_k: int = 3
+    # persist_directory: str = "./chroma_db"
 class AdvancedRAGSystem:
     """Advanced RAG System with improved error handling and type safety"""
         self.config = config or RAGConfig()
         self.vector_store: Optional[Chroma] = None
         self.last_context: Optional[str] = None
+        self.context = None
+        self.source_documents = 0
+        # self.prompt = PromptTemplate(
+        #     template=self.DEFAULT_TEMPLATE,
+        #     input_variables=["context", "question"]
+        # )
     def _validate_file(self, file_path: Path) -> bool:
         """Validate if the file is of supported format and exists"""
             retrieved_docs = retriever.get_relevant_documents(question)
             context = self._format_context(retrieved_docs)
             self.last_context = context
+            messages = [
+    {
+        "role":"system",
+        "content":f"""<|start_header_id|>system<|end_header_id|>
+You are a helpful assistant. Use the following pieces of context to answer the question at the end.
+If you don't know the answer, just say that you don't know, don't try to make up an answer.
+Context:
+{context}
+<|eot_id|><|start_header_id|>user<|end_header_id|>
+{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+"""
+    },
+	{
+		"role": "user",
+		"content": "What is the capital of France?"
+	}
+]
+            self.context = context
+            self.source_documents = len(retrieved_docs)
+            # Generate response using LLM ###########
+            # response = self.llm.invoke(
+            #     self.prompt.format(
+            #         context=context,
+            #         question=question
+            #     )
+            # )
+            return client.chat.completions.create(
+                        model=model_name,
+                    	messages=messages,
+                    	max_tokens=500,
+                        stream=True
+                        )
         except Exception as e:
             error_msg = f"Error during query processing: {str(e)}"
         except Exception as e:
             return f"Error: {str(e)}"
+    def query_fin(question):
         """Query system and update history with error handling"""
         try:
+            for x in rag_system.query(question):
+                yield x.choices[0].delta.content
         except Exception as e:
+            pass
+    def update_history(question: str):
+        return f"Last context used ({self.source_documents} documents):\n\n{self.context}"
     with gr.Blocks(title="Advanced RAG System") as demo:
         gr.Markdown("# Advanced RAG System with PDF Processing")
         )
         query_button.click(
+            fn=query_fin,
             inputs=[question_input],
+            outputs=[answer_output]
+        )
+        query_button.click(
+            fn=update_history,
+            inputs=[],
+            outputs=[history_output]
         )
     return demo