Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,7 +22,7 @@ import tqdm
|
|
| 22 |
import accelerate
|
| 23 |
import re
|
| 24 |
|
| 25 |
-
|
| 26 |
|
| 27 |
|
| 28 |
# default_persist_directory = './chroma_HF/'
|
|
@@ -111,7 +111,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
| 111 |
max_new_tokens = max_tokens,
|
| 112 |
top_k = top_k,
|
| 113 |
load_in_8bit = True,
|
| 114 |
-
huggingfacehub_api_token =
|
| 115 |
)
|
| 116 |
elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
|
| 117 |
raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
|
|
@@ -120,7 +120,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
| 120 |
temperature = temperature,
|
| 121 |
max_new_tokens = max_tokens,
|
| 122 |
top_k = top_k,
|
| 123 |
-
huggingfacehub_api_token =
|
| 124 |
)
|
| 125 |
elif llm_model == "microsoft/phi-2":
|
| 126 |
# raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
|
|
@@ -132,7 +132,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
| 132 |
top_k = top_k,
|
| 133 |
trust_remote_code = True,
|
| 134 |
torch_dtype = "auto",
|
| 135 |
-
huggingfacehub_api_token =
|
| 136 |
)
|
| 137 |
elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
|
| 138 |
llm = HuggingFaceEndpoint(
|
|
@@ -141,7 +141,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
| 141 |
temperature = temperature,
|
| 142 |
max_new_tokens = 250,
|
| 143 |
top_k = top_k,
|
| 144 |
-
huggingfacehub_api_token =
|
| 145 |
)
|
| 146 |
elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
|
| 147 |
raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
|
|
@@ -151,7 +151,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
| 151 |
temperature = temperature,
|
| 152 |
max_new_tokens = max_tokens,
|
| 153 |
top_k = top_k,
|
| 154 |
-
huggingfacehub_api_token =
|
| 155 |
)
|
| 156 |
else:
|
| 157 |
llm = HuggingFaceEndpoint(
|
|
@@ -161,7 +161,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
| 161 |
temperature = temperature,
|
| 162 |
max_new_tokens = max_tokens,
|
| 163 |
top_k = top_k,
|
| 164 |
-
huggingfacehub_api_token =
|
| 165 |
)
|
| 166 |
|
| 167 |
progress(0.75, desc="Defining buffer memory...")
|
|
|
|
| 22 |
import accelerate
|
| 23 |
import re
|
| 24 |
|
| 25 |
+
api_key = os.getenv('API_KEY')
|
| 26 |
|
| 27 |
|
| 28 |
# default_persist_directory = './chroma_HF/'
|
|
|
|
| 111 |
max_new_tokens = max_tokens,
|
| 112 |
top_k = top_k,
|
| 113 |
load_in_8bit = True,
|
| 114 |
+
huggingfacehub_api_token = 'api_key',
|
| 115 |
)
|
| 116 |
elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
|
| 117 |
raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
|
|
|
|
| 120 |
temperature = temperature,
|
| 121 |
max_new_tokens = max_tokens,
|
| 122 |
top_k = top_k,
|
| 123 |
+
huggingfacehub_api_token = 'api_key',
|
| 124 |
)
|
| 125 |
elif llm_model == "microsoft/phi-2":
|
| 126 |
# raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
|
|
|
|
| 132 |
top_k = top_k,
|
| 133 |
trust_remote_code = True,
|
| 134 |
torch_dtype = "auto",
|
| 135 |
+
huggingfacehub_api_token = 'api_key',
|
| 136 |
)
|
| 137 |
elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
|
| 138 |
llm = HuggingFaceEndpoint(
|
|
|
|
| 141 |
temperature = temperature,
|
| 142 |
max_new_tokens = 250,
|
| 143 |
top_k = top_k,
|
| 144 |
+
huggingfacehub_api_token = 'api_key',
|
| 145 |
)
|
| 146 |
elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
|
| 147 |
raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
|
|
|
|
| 151 |
temperature = temperature,
|
| 152 |
max_new_tokens = max_tokens,
|
| 153 |
top_k = top_k,
|
| 154 |
+
huggingfacehub_api_token = 'api_key',
|
| 155 |
)
|
| 156 |
else:
|
| 157 |
llm = HuggingFaceEndpoint(
|
|
|
|
| 161 |
temperature = temperature,
|
| 162 |
max_new_tokens = max_tokens,
|
| 163 |
top_k = top_k,
|
| 164 |
+
huggingfacehub_api_token = 'api_key',
|
| 165 |
)
|
| 166 |
|
| 167 |
progress(0.75, desc="Defining buffer memory...")
|