MindLabUnimib commited on
Commit
2d8a10d
·
verified ·
1 Parent(s): f78de35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -55
app.py CHANGED
@@ -1,79 +1,79 @@
1
  import torch
2
  import spaces
3
- import logging
4
  import gradio as gr
5
 
6
- from transformers import (
7
- AutoModelForCausalLM,
8
- AutoTokenizer,
9
- AutoModelForSequenceClassification,
10
- )
11
 
12
- logging.basicConfig(level=logging.INFO)
 
 
 
13
 
14
- logger = logging.getLogger(__name__)
 
 
 
15
 
 
 
16
 
17
- chat_model_name = "sapienzanlp/Minerva-7B-instruct-v1.0"
18
- chat_model = AutoModelForCausalLM.from_pretrained(chat_model_name, dtype=torch.bfloat16, device_map="cpu")
19
- chat_model.to("cuda")
20
- chat_tokenizer = AutoTokenizer.from_pretrained(chat_model_name)
21
 
22
- moderator_model_name = "saiteki-kai/QA-DeBERTa-v3-large-binary-3"
23
- moderator_model = AutoModelForSequenceClassification.from_pretrained(moderator_model_name, device_map="cpu")
24
- moderator_model.to("cuda")
25
- moderator_tokenizer = AutoTokenizer.from_pretrained(moderator_model_name, padding_side="right")
 
 
 
 
 
 
 
 
 
26
 
27
- def generate_responses(model, tokenizer, prompts):
28
- messages = [[{"role": "user", "content": message}] for message in prompts]
29
 
30
- texts = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
31
- model_inputs = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(model.device)
 
 
32
 
33
- with torch.inference_mode():
34
- generated_ids = model.generate(
35
- **model_inputs,
36
- do_sample=False,
37
- temperature=0,
38
- repetition_penalty=1.1,
39
- max_new_tokens=512,
40
- )
41
- prompt_lengths = model_inputs["attention_mask"].sum(dim=1) - 1
42
- generated_ids = [
43
- output_ids[length:] for length, output_ids in zip(prompt_lengths, generated_ids)
44
- ]
45
- responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
46
-
47
- return responses
48
-
49
- def classify_pairs(model, tokenizer, prompts, responses):
50
- texts = [
51
- prompt + "[SEP]" + response for prompt, response in zip(prompts, responses)
52
- ]
53
-
54
- input_ids = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(model.device)
55
- print(tokenizer.batch_decode(input_ids["input_ids"]))
56
 
57
- with torch.inference_mode():
58
- outputs = model(**input_ids)
59
- scores = torch.softmax(outputs.logits, dim=-1).detach().cpu()
60
- unsafety_scores = [float(s[1]) for s in scores] # get unsafe axis
61
 
62
- return unsafety_scores
63
 
64
 
65
  @spaces.GPU(duration=120)
66
  def generate(submission: list[dict[str, str]], team_id: str) -> list[dict[str, str | float]]:
67
  print("GENERATE")
68
 
69
- ids = [s["id"] for s in submission]
70
- prompts = [s["prompt"] for s in submission]
 
 
 
71
 
72
- responses = generate_responses(chat_model, chat_tokenizer, prompts)
73
- print(responses)
74
 
75
- scores = classify_pairs(moderator_model, moderator_tokenizer, prompts, responses)
76
- print(scores)
 
 
 
77
 
78
  outputs = [
79
  {"id": id, "prompt": prompt, "response": response, "score": score, "model": chat_model_name, "team_id": team_id}
@@ -85,7 +85,6 @@ def generate(submission: list[dict[str, str]], team_id: str) -> list[dict[str, s
85
 
86
  with gr.Blocks() as demo:
87
  print("START")
88
- gr.Markdown("Welcome")
89
  gr.api(generate, api_name="scores", concurrency_limit=None, batch=False)
90
 
91
 
 
1
  import torch
2
  import spaces
 
3
  import gradio as gr
4
 
5
+ # from transformers import (
6
+ # AutoModelForCausalLM,
7
+ # AutoTokenizer,
8
+ # AutoModelForSequenceClassification,
9
+ # )
10
 
11
+ # chat_model_name = "sapienzanlp/Minerva-7B-instruct-v1.0"
12
+ # chat_model = AutoModelForCausalLM.from_pretrained(chat_model_name, dtype=torch.bfloat16, device_map="cpu")
13
+ # chat_model.to("cuda")
14
+ # chat_tokenizer = AutoTokenizer.from_pretrained(chat_model_name)
15
 
16
+ # moderator_model_name = "saiteki-kai/QA-DeBERTa-v3-large-binary-3"
17
+ # moderator_model = AutoModelForSequenceClassification.from_pretrained(moderator_model_name, device_map="cpu")
18
+ # moderator_model.to("cuda")
19
+ # moderator_tokenizer = AutoTokenizer.from_pretrained(moderator_model_name, padding_side="right")
20
 
21
+ # def generate_responses(model, tokenizer, prompts):
22
+ # messages = [[{"role": "user", "content": message}] for message in prompts]
23
 
24
+ # texts = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
25
+ # model_inputs = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(model.device)
 
 
26
 
27
+ # with torch.inference_mode():
28
+ # generated_ids = model.generate(
29
+ # **model_inputs,
30
+ # do_sample=False,
31
+ # temperature=0,
32
+ # repetition_penalty=1.1,
33
+ # max_new_tokens=512,
34
+ # )
35
+ # prompt_lengths = model_inputs["attention_mask"].sum(dim=1) - 1
36
+ # generated_ids = [
37
+ # output_ids[length:] for length, output_ids in zip(prompt_lengths, generated_ids)
38
+ # ]
39
+ # responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
40
 
41
+ # return responses
 
42
 
43
+ # def classify_pairs(model, tokenizer, prompts, responses):
44
+ # texts = [
45
+ # prompt + "[SEP]" + response for prompt, response in zip(prompts, responses)
46
+ # ]
47
 
48
+ # input_ids = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(model.device)
49
+ # print(tokenizer.batch_decode(input_ids["input_ids"]))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ # with torch.inference_mode():
52
+ # outputs = model(**input_ids)
53
+ # scores = torch.softmax(outputs.logits, dim=-1).detach().cpu()
54
+ # unsafety_scores = [float(s[1]) for s in scores] # get unsafe axis
55
 
56
+ # return unsafety_scores
57
 
58
 
59
  @spaces.GPU(duration=120)
60
  def generate(submission: list[dict[str, str]], team_id: str) -> list[dict[str, str | float]]:
61
  print("GENERATE")
62
 
63
+ # ids = [s["id"] for s in submission]
64
+ # prompts = [s["prompt"] for s in submission]
65
+
66
+ # responses = generate_responses(chat_model, chat_tokenizer, prompts)
67
+ # print(responses)
68
 
69
+ # scores = classify_pairs(moderator_model, moderator_tokenizer, prompts, responses)
70
+ # print(scores)
71
 
72
+ chat_model_name = "sapienzanlp/Minerva-7B-instruct-v1.0"
73
+ ids = [s["id"] for s in submission]
74
+ prompts = [s["prompt"] for s in submission]
75
+ responses = ["This is a placeholder response." for _ in prompts]
76
+ scores = [0.5 for _ in prompts]
77
 
78
  outputs = [
79
  {"id": id, "prompt": prompt, "response": response, "score": score, "model": chat_model_name, "team_id": team_id}
 
85
 
86
  with gr.Blocks() as demo:
87
  print("START")
 
88
  gr.api(generate, api_name="scores", concurrency_limit=None, batch=False)
89
 
90