Chamaka8 commited on
Commit
6154c88
·
verified ·
1 Parent(s): 0cdd9bb

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +31 -37
app.py CHANGED
@@ -21,6 +21,13 @@ SENTIMENT_CLASSES = ["Positive", "Negative", "Neutral"]
21
 
22
  print(f"===== Startup at {datetime.datetime.now()} =====")
23
 
 
 
 
 
 
 
 
24
  print("Loading tokenizer...")
25
  tokenizer = PreTrainedTokenizerFast.from_pretrained(TOK_MODEL, token=HF_TOKEN)
26
  tokenizer.pad_token = tokenizer.eos_token
@@ -28,43 +35,30 @@ tokenizer.pad_token_id = 0
28
  tokenizer.padding_side = "right"
29
  print("Tokenizer ready")
30
 
31
- print("Loading base model in 4-bit...")
32
- bnb = BitsAndBytesConfig(
33
- load_in_4bit=True,
34
- bnb_4bit_quant_type="nf4",
35
- bnb_4bit_compute_dtype=torch.float16,
36
- bnb_4bit_use_double_quant=True
37
- )
38
- base = AutoModelForCausalLM.from_pretrained(
39
- BASE_MODEL,
40
- quantization_config=bnb,
41
- device_map="cpu",
42
- token=HF_TOKEN
43
- )
44
- print("Base model ready")
45
-
46
- print("Loading classifier heads...")
47
-
48
- news_model = PeftModel.from_pretrained(base, NEWS_ADAPTER, token=HF_TOKEN)
49
- news_model.eval()
50
- news_head_path = hf_hub_download(repo_id=NEWS_ADAPTER, filename="classifier_head.pt", token=HF_TOKEN)
51
- news_head = nn.Linear(4096, len(NEWS_CLASSES))
52
- news_head.load_state_dict(torch.load(news_head_path, map_location="cpu"))
53
- news_head.eval()
54
-
55
- writing_model = PeftModel.from_pretrained(base, WRITING_ADAPTER, token=HF_TOKEN)
56
- writing_model.eval()
57
- writing_head_path = hf_hub_download(repo_id=WRITING_ADAPTER, filename="classifier_head.pt", token=HF_TOKEN)
58
- writing_head = nn.Linear(4096, len(WRITING_CLASSES))
59
- writing_head.load_state_dict(torch.load(writing_head_path, map_location="cpu"))
60
- writing_head.eval()
61
-
62
- sentiment_model = PeftModel.from_pretrained(base, SENTIMENT_ADAPTER, token=HF_TOKEN)
63
- sentiment_model.eval()
64
- sentiment_head_path = hf_hub_download(repo_id=SENTIMENT_ADAPTER, filename="classifier_head.pt", token=HF_TOKEN)
65
- sentiment_head = nn.Linear(4096, len(SENTIMENT_CLASSES))
66
- sentiment_head.load_state_dict(torch.load(sentiment_head_path, map_location="cpu"))
67
- sentiment_head.eval()
68
 
69
  print("All models ready!")
70
 
 
21
 
22
  print(f"===== Startup at {datetime.datetime.now()} =====")
23
 
24
+ bnb = BitsAndBytesConfig(
25
+ load_in_4bit=True,
26
+ bnb_4bit_quant_type="nf4",
27
+ bnb_4bit_compute_dtype=torch.float16,
28
+ bnb_4bit_use_double_quant=True
29
+ )
30
+
31
  print("Loading tokenizer...")
32
  tokenizer = PreTrainedTokenizerFast.from_pretrained(TOK_MODEL, token=HF_TOKEN)
33
  tokenizer.pad_token = tokenizer.eos_token
 
35
  tokenizer.padding_side = "right"
36
  print("Tokenizer ready")
37
 
38
+ def load_model_and_head(adapter_repo, num_classes):
39
+ print(f"Loading {adapter_repo}...")
40
+ base = AutoModelForCausalLM.from_pretrained(
41
+ BASE_MODEL,
42
+ quantization_config=bnb,
43
+ device_map="cpu",
44
+ token=HF_TOKEN
45
+ )
46
+ model = PeftModel.from_pretrained(base, adapter_repo, token=HF_TOKEN)
47
+ model.eval()
48
+ head_path = hf_hub_download(
49
+ repo_id=adapter_repo,
50
+ filename="classifier_head.pt",
51
+ token=HF_TOKEN
52
+ )
53
+ head = nn.Linear(4096, num_classes)
54
+ head.load_state_dict(torch.load(head_path, map_location="cpu"))
55
+ head.eval()
56
+ print(f"{adapter_repo} ready")
57
+ return model, head
58
+
59
+ news_model, news_head = load_model_and_head(NEWS_ADAPTER, 5)
60
+ writing_model, writing_head = load_model_and_head(WRITING_ADAPTER, 4)
61
+ sentiment_model, sentiment_head = load_model_and_head(SENTIMENT_ADAPTER, 3)
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  print("All models ready!")
64