Spaces:

OnurKerimoglu
/

rag_chat

Sleeping

App Files Files Community

OnurKerimoglu commited on May 26, 2025

Commit

4305c1f

1 Parent(s): bc99a02

src.rag: fixed the interface for using HF-inference models; default llm model: zephyr-7b-beta

Browse files

Files changed (1) hide show

src/rag.py +22 -19

src/rag.py CHANGED Viewed

@@ -23,8 +23,8 @@ class RAG():
         # Constants
         # self.use_model = 'gpt-4o-mini'
-        # self.use_model = 'zephyr-7b-alpha'
-        self.use_model = 'Mistral-Nemo-Base-2407'
         # self.use_vectordb = 'chroma'
         self.use_vectordb = 'faiss'
@@ -108,10 +108,10 @@ class RAG():
         """
         Instantiates a language model based on the specified model type.
-        This function supports two models:
         - 'gpt-4o-mini' through the ChatOpenAI interface
-        - 'Mistral-Nemo-Base-2407' through the HuggingFaceEndpoint, with provider: novita
-        ('zephyr-7b-alpha' through the HuggingFaceEndpoint is being tested, but not working at the moment)
         The model is determined by the `self.use_model` attribute.
         Returns an instance of the selected language model.
@@ -124,19 +124,21 @@ class RAG():
             llm = ChatOpenAI(
                 model_name="gpt-4o-mini",
                 temperature=0)
-        # elif self.use_model ==  'zephyr-7b-alpha':
-        #     print(f'As llm, using HF-Endpint: {self.use_model}')
-        #     llm = HuggingFaceEndpoint(
-        #         repo_id=f"HuggingFaceH4/{self.use_model}",
-        #         temperature=0.1,
-        #         max_new_tokens=512,
-        #         do_sample=False
-        #         )
         elif self.use_model ==  'Mistral-Nemo-Base-2407':
             provider = "novita"
             print(f'As llm, using HF-Endpint: {self.use_model} through provider: {provider}')
             llm = HuggingFaceEndpoint(
-                repo_id="mistralai/Mistral-Nemo-Base-2407",
                 provider=provider,
                 temperature=0.1,
                 max_new_tokens=512,
@@ -229,13 +231,14 @@ class RAG():
 if __name__ == "__main__":
     rag = RAG(
-        urls = [
-            "https://en.wikipedia.org/wiki/Artificial_intelligence",
-            "https://en.wikipedia.org/wiki/Machine_learning"
-        ]
         # pdfs = ["/home/onur/WORK/DS/repos/chat_with_docs/docs/the-big-book-of-mlops-v10-072023 - Databricks.pdf"]
     )
-    response = rag.ask_QAbot("What is Machine Learning?")
     print(f"Question: {response['question']}")
     print(f"Answer: {response['answer']}")
     print("Sources:")

         # Constants
         # self.use_model = 'gpt-4o-mini'
+        self.use_model = 'zephyr-7b-beta'
+        # self.use_model = 'Mistral-Nemo-Base-2407'
         # self.use_vectordb = 'chroma'
         self.use_vectordb = 'faiss'
         """
         Instantiates a language model based on the specified model type.
+        This function supports the following models:
         - 'gpt-4o-mini' through the ChatOpenAI interface
+        - 'zephyr-7b-beta' through the HuggingFaceEndpoint with provider: hf-inference
+        - 'Mistral-Nemo-Base-2407' through the HuggingFaceEndpoint, with provider: novita (at testing stage)
         The model is determined by the `self.use_model` attribute.
         Returns an instance of the selected language model.
             llm = ChatOpenAI(
                 model_name="gpt-4o-mini",
                 temperature=0)
+        elif self.use_model ==  'zephyr-7b-beta':
+            provider = "hf-inference"
+            print(f'As llm, using HF-Endpint: {self.use_model} through provider: {provider}')
+            llm = HuggingFaceEndpoint(
+                repo_id=f"HuggingFaceH4/{self.use_model}",
+                provider=provider,
+                temperature=0.1,
+                max_new_tokens=512,
+                do_sample=False
+            )
         elif self.use_model ==  'Mistral-Nemo-Base-2407':
             provider = "novita"
             print(f'As llm, using HF-Endpint: {self.use_model} through provider: {provider}')
             llm = HuggingFaceEndpoint(
+                repo_id=f"mistralai/{self.use_model}",
                 provider=provider,
                 temperature=0.1,
                 max_new_tokens=512,
 if __name__ == "__main__":
     rag = RAG(
+        # urls = [
+        #     "https://en.wikipedia.org/wiki/Artificial_intelligence",
+        #     "https://en.wikipedia.org/wiki/Machine_learning"
+        #]
         # pdfs = ["/home/onur/WORK/DS/repos/chat_with_docs/docs/the-big-book-of-mlops-v10-072023 - Databricks.pdf"]
+        pdfs =['/home/onur/Desktop/job_app/Resume_Onur_Kerimoglu.pdf']
     )
+    response = rag.ask_QAbot("What technical skills does Onur Kerimoglu possess?")
     print(f"Question: {response['question']}")
     print(f"Answer: {response['answer']}")
     print("Sources:")