MindLabUnimib commited on
Commit
0195be0
·
1 Parent(s): eec20e0

feat: update output format

Browse files
Files changed (1) hide show
  1. app.py +52 -25
app.py CHANGED
@@ -1,45 +1,72 @@
1
  import gradio as gr
2
  import spaces
3
- from transformers import pipeline
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
5
  import torch
6
- import json
7
 
8
- model_name = "sapienzanlp/Minerva-7B-instruct-v1.0"
9
- model = AutoModelForCausalLM.from_pretrained(
10
- model_name,
11
- torch_dtype=torch.bfloat16,
12
- device_map="auto"
 
 
 
 
13
  )
14
- tokenizer = AutoTokenizer.from_pretrained(model_name)
15
 
16
- classifier = pipeline("text-classification", model="saiteki-kai/QA-DeBERTa-v3-large")
17
 
18
- @spaces.GPU()
19
- def generate(prompts: list[str]) -> list[tuple[str, dict[str, float]]]:
20
  messages = [[{"role": "user", "content": message}] for message in prompts]
21
 
22
  texts = tokenizer.apply_chat_template(
23
- messages,
24
- tokenize=False,
25
- add_generation_prompt=True
26
- )
27
- model_inputs = tokenizer(texts, padding=True, return_tensors="pt").to(model.device)
28
- generated_ids = model.generate(
29
- **model_inputs,
30
- do_sample=False,
31
- temperature=0,
32
- repetition_penalty=1.0,
33
- max_new_tokens=512,
34
  )
35
 
 
 
 
 
 
 
 
 
 
 
36
  prompt_lengths = (model_inputs.input_ids != tokenizer.pad_token_id).sum(dim=1)
37
  generated_ids = [
38
  output_ids[length:] for length, output_ids in zip(prompt_lengths, generated_ids)
39
  ]
40
  responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
41
 
42
- return list(zip(responses, classifier([text + "[SEP]" + response for text, response in zip(texts, responses)])))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
 
45
  with gr.Blocks() as demo:
@@ -47,4 +74,4 @@ with gr.Blocks() as demo:
47
  gr.api(generate, api_name="scores", batch=False)
48
 
49
  demo.queue()
50
- demo.launch()
 
1
  import gradio as gr
2
  import spaces
3
+ from transformers import (
4
+ AutoModelForCausalLM,
5
+ AutoTokenizer,
6
+ AutoModelForSequenceClassification,
7
+ )
8
  import torch
 
9
 
10
+ chat_model_name = "sapienzanlp/Minerva-7B-instruct-v1.0"
11
+ chat_model = AutoModelForCausalLM.from_pretrained(
12
+ chat_model_name, torch_dtype=torch.bfloat16, device_map="auto"
13
+ )
14
+ chat_tokenizer = AutoTokenizer.from_pretrained(chat_model_name)
15
+
16
+ moderator_model_name = "saiteki-kai/QA-DeBERTa-v3-large"
17
+ moderator_model = AutoModelForSequenceClassification.from_pretrained(
18
+ moderator_model_name, device_map="auto"
19
  )
20
+ moderator_tokenizer = AutoTokenizer.from_pretrained(moderator_model_name)
21
 
 
22
 
23
+ def generate_responses(model, tokenizer, prompts):
 
24
  messages = [[{"role": "user", "content": message}] for message in prompts]
25
 
26
  texts = tokenizer.apply_chat_template(
27
+ messages, tokenize=False, add_generation_prompt=True
 
 
 
 
 
 
 
 
 
 
28
  )
29
 
30
+ with torch.inference_mode():
31
+ model_inputs = tokenizer(texts, padding=True, return_tensors="pt").to(model.device)
32
+ generated_ids = model.generate(
33
+ **model_inputs,
34
+ do_sample=False,
35
+ temperature=0,
36
+ repetition_penalty=1.0,
37
+ max_new_tokens=512,
38
+ )
39
+
40
  prompt_lengths = (model_inputs.input_ids != tokenizer.pad_token_id).sum(dim=1)
41
  generated_ids = [
42
  output_ids[length:] for length, output_ids in zip(prompt_lengths, generated_ids)
43
  ]
44
  responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
45
 
46
+ return responses
47
+
48
+
49
+ def classify_pairs(model, tokenizer, prompts, responses):
50
+ texts = [
51
+ prompt + "[SEP]" + response for prompt, response in zip(prompts, responses)
52
+ ]
53
+
54
+ with torch.inference_mode():
55
+ input_ids = tokenizer(texts, padding=True, max_length=512).to(model.device)
56
+ outputs = model(**input_ids)
57
+
58
+ return outputs
59
+
60
+
61
+ @spaces.GPU()
62
+ def generate(prompts: list[str]) -> list[dict[str, str | float]]:
63
+ responses = generate_responses(chat_model, chat_tokenizer, prompts)
64
+ scores = classify_pairs(moderator_model, moderator_tokenizer, prompts, responses)
65
+
66
+ return [
67
+ {"prompt": prompt, "response": response, "score": score}
68
+ for prompt, response, score in zip(prompts, responses, scores)
69
+ ]
70
 
71
 
72
  with gr.Blocks() as demo:
 
74
  gr.api(generate, api_name="scores", batch=False)
75
 
76
  demo.queue()
77
+ demo.launch()