Spaces:

Parth211
/

rag-model-v1

Sleeping

App Files Files Community

Parth211 commited on Jun 7, 2024

Commit

d83cae6

verified ·

1 Parent(s): ccedd4f

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -7

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ import tqdm
 import accelerate
 import re
-API_KEY = os.getenv('API_KEY')
 # default_persist_directory = './chroma_HF/'
@@ -111,7 +111,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
             max_new_tokens = max_tokens,
             top_k = top_k,
             load_in_8bit = True,
-            huggingfacehub_api_token = API_KEY,
         )
     elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
         raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
@@ -120,7 +120,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
             temperature = temperature,
             max_new_tokens = max_tokens,
             top_k = top_k,
-            huggingfacehub_api_token = API_KEY,
         )
     elif llm_model == "microsoft/phi-2":
         # raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
@@ -132,7 +132,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
             top_k = top_k,
             trust_remote_code = True,
             torch_dtype = "auto",
-            huggingfacehub_api_token = API_KEY,
         )
     elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
         llm = HuggingFaceEndpoint(
@@ -141,7 +141,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
             temperature = temperature,
             max_new_tokens = 250,
             top_k = top_k,
-            huggingfacehub_api_token = API_KEY,
         )
     elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
         raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
@@ -151,7 +151,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
             temperature = temperature,
             max_new_tokens = max_tokens,
             top_k = top_k,
-            huggingfacehub_api_token = API_KEY,
         )
     else:
         llm = HuggingFaceEndpoint(
@@ -161,7 +161,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
             temperature = temperature,
             max_new_tokens = max_tokens,
             top_k = top_k,
-            huggingfacehub_api_token = API_KEY,
         )
     progress(0.75, desc="Defining buffer memory...")

 import accelerate
 import re
+api_key = os.getenv('API_KEY')
 # default_persist_directory = './chroma_HF/'
             max_new_tokens = max_tokens,
             top_k = top_k,
             load_in_8bit = True,
+            huggingfacehub_api_token = 'api_key',
         )
     elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
         raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
             temperature = temperature,
             max_new_tokens = max_tokens,
             top_k = top_k,
+            huggingfacehub_api_token = 'api_key',
         )
     elif llm_model == "microsoft/phi-2":
         # raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
             top_k = top_k,
             trust_remote_code = True,
             torch_dtype = "auto",
+            huggingfacehub_api_token = 'api_key',
         )
     elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
         llm = HuggingFaceEndpoint(
             temperature = temperature,
             max_new_tokens = 250,
             top_k = top_k,
+            huggingfacehub_api_token = 'api_key',
         )
     elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
         raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
             temperature = temperature,
             max_new_tokens = max_tokens,
             top_k = top_k,
+            huggingfacehub_api_token = 'api_key',
         )
     else:
         llm = HuggingFaceEndpoint(
             temperature = temperature,
             max_new_tokens = max_tokens,
             top_k = top_k,
+            huggingfacehub_api_token = 'api_key',
         )
     progress(0.75, desc="Defining buffer memory...")