Spaces:
Paused
Paused
Tao Wu commited on
Commit ·
032427b
1
Parent(s): 26cf43b
quantization
Browse files- app/embedding_setup.py +8 -4
- requirements.txt +0 -0
app/embedding_setup.py
CHANGED
|
@@ -2,7 +2,7 @@ from langchain_community.vectorstores import Chroma
|
|
| 2 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
| 3 |
|
| 4 |
from langchain.docstore.document import Document
|
| 5 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
| 6 |
from peft import PeftModel
|
| 7 |
from config import *
|
| 8 |
import os
|
|
@@ -34,9 +34,13 @@ retriever = db.as_retriever(search_kwargs={"k": TOP_K})
|
|
| 34 |
lora_weights_rec = REC_LORA_MODEL
|
| 35 |
lora_weights_exp = EXP_LORA_MODEL
|
| 36 |
hf_auth = os.environ.get("hf_token")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
|
| 39 |
-
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL, token=hf_auth)
|
| 40 |
|
| 41 |
|
| 42 |
first_token = 'First'
|
|
@@ -130,7 +134,7 @@ def compare_docs_with_context(doc_a, doc_b, target_occupation_name, target_occup
|
|
| 130 |
|
| 131 |
#courses = f"First: name: {doc_a.metadata['name']} description:{doc_a.metadata['description']} Second: name: {doc_b.metadata['name']} description:{Sdoc_b.metadata['description']}"
|
| 132 |
courses = f"First: name: {doc_a.metadata['name']} learning outcomes:{doc_a.metadata['skills'][:2000]} Second: name: {doc_b.metadata['name']} learning outcomes:{doc_b.metadata['skills'][:2000]}"
|
| 133 |
-
target_occupation = f"name: {target_occupation_name} description: {target_occupation_dsp}"
|
| 134 |
skill_gap = skill_gap
|
| 135 |
prompt = generate_prompt(target_occupation, skill_gap, courses)
|
| 136 |
prompt = [prompt]
|
|
|
|
| 2 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
| 3 |
|
| 4 |
from langchain.docstore.document import Document
|
| 5 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig,BitsAndBytesConfig
|
| 6 |
from peft import PeftModel
|
| 7 |
from config import *
|
| 8 |
import os
|
|
|
|
| 34 |
lora_weights_rec = REC_LORA_MODEL
|
| 35 |
lora_weights_exp = EXP_LORA_MODEL
|
| 36 |
hf_auth = os.environ.get("hf_token")
|
| 37 |
+
quantization_config = BitsAndBytesConfig(
|
| 38 |
+
load_in_4bit=True,
|
| 39 |
+
bnb_4bit_compute_dtype=torch.float16,
|
| 40 |
+
bnb_4bit_quant_type="nf4"
|
| 41 |
+
)
|
| 42 |
|
| 43 |
+
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL, quantization_config=quantization_config, token=hf_auth)
|
|
|
|
| 44 |
|
| 45 |
|
| 46 |
first_token = 'First'
|
|
|
|
| 134 |
|
| 135 |
#courses = f"First: name: {doc_a.metadata['name']} description:{doc_a.metadata['description']} Second: name: {doc_b.metadata['name']} description:{Sdoc_b.metadata['description']}"
|
| 136 |
courses = f"First: name: {doc_a.metadata['name']} learning outcomes:{doc_a.metadata['skills'][:2000]} Second: name: {doc_b.metadata['name']} learning outcomes:{doc_b.metadata['skills'][:2000]}"
|
| 137 |
+
target_occupation = f"name: {target_occupation_name} description: {target_occupation_dsp[:2000]}"
|
| 138 |
skill_gap = skill_gap
|
| 139 |
prompt = generate_prompt(target_occupation, skill_gap, courses)
|
| 140 |
prompt = [prompt]
|
requirements.txt
CHANGED
|
Binary files a/requirements.txt and b/requirements.txt differ
|
|
|