apehex commited on
Commit
2431602
·
1 Parent(s): 170fb4c

Rebrand the app as a human / LLM detector.

Browse files
Files changed (2) hide show
  1. README.md +7 -7
  2. app.py +27 -101
README.md CHANGED
@@ -1,20 +1,20 @@
1
  ---
2
  pinned: false
3
- title: 'Surprisal Scores'
4
- emoji: '😳'
5
- colorFrom: 'gray'
6
  colorTo: 'red'
7
  sdk: 'gradio'
8
- sdk_version: '6.4.0'
9
  app_file: 'app.py'
10
  license: 'agpl-3.0'
11
- short_description: 'Plot a few metrics to measure how unexpected each token is.'
12
  models: ['openai/gpt-oss-20b']
13
  ---
14
 
15
- ## psAI-Ops: Surprisal Scores <img src="images/logo.png" alt="apehex logo" width="32" height="32">
16
 
17
- Plot a few metrics to measure how unexpected each token is.
18
 
19
  Licensed under the [aGPLv3][github-license].
20
 
 
1
  ---
2
  pinned: false
3
+ title: 'Human Scores'
4
+ emoji: '🤖'
5
+ colorFrom: 'green'
6
  colorTo: 'red'
7
  sdk: 'gradio'
8
+ sdk_version: '6.8.0'
9
  app_file: 'app.py'
10
  license: 'agpl-3.0'
11
+ short_description: 'Parse LLM / human sections in text samples.'
12
  models: ['openai/gpt-oss-20b']
13
  ---
14
 
15
+ ## psAI-Ops: Human Scores <img src="images/logo.png" alt="apehex logo" width="32" height="32">
16
 
17
+ Leverage an open source LLM as critic to take apart the text sections written by a human from those generated by an AI.
18
 
19
  Licensed under the [aGPLv3][github-license].
20
 
app.py CHANGED
@@ -6,7 +6,7 @@ import spaces
6
  import psaiops.common.model
7
  import psaiops.common.style
8
  import psaiops.common.tokenizer
9
- import psaiops.score.surprisal.app as _app
10
 
11
  # META #########################################################################
12
 
@@ -14,9 +14,6 @@ import psaiops.score.surprisal.app as _app
14
  psaiops.common.model.get_model(name=_app.MODEL, device='cpu')
15
 
16
  # but do not instantiate unless necessary
17
- _LAYERS = {
18
- 'norm': None,
19
- 'head': None,}
20
  _MODELS = {
21
  'cpu': None,
22
  'cuda': None,}
@@ -31,9 +28,6 @@ def fetch_model(device: str='cuda') -> object:
31
  # control when the model is downloaded to avoid moving it to the CPU
32
  if _MODELS.get(device, None) is None:
33
  _MODELS[device] = psaiops.common.model.get_model(name=_app.MODEL, device=device)
34
- # move specific layers to the CPU for the computation of the metrics
35
- _LAYERS['norm'] = copy.deepcopy(_MODELS[device].model.norm).cpu()
36
- _LAYERS['head'] = copy.deepcopy(_MODELS[device].lm_head).cpu()
37
  # tuple of objects or (None, None)
38
  return _MODELS.get(device, None)
39
 
@@ -67,113 +61,45 @@ def compute_states(
67
  model_obj=__model,
68
  tokenizer_obj=__tokenizer)
69
 
70
- # PROBABILITIES ################################################################
71
 
72
- def score_probs(
73
- output_data: object,
74
- hidden_data: object,
75
- ) -> list:
76
- # exit if the model has not yet been loaded
77
- if _LAYERS.get('head', None) is None:
78
- return []
79
- # do not download the model without the GPU wrapper
80
- __tokenizer = fetch_tokenizer(device='cpu')
81
- # fill all the arguments that cannot be pickled
82
- return _app.update_prob_scores(
83
- output_data=output_data,
84
- hidden_data=hidden_data,
85
- tokenizer_obj=__tokenizer,
86
- head_obj=_LAYERS['head'],)
87
-
88
- def plot_probs(
89
- output_data: object,
90
- hidden_data: object,
91
  ) -> object:
92
- # exit if the model has not yet been loaded
93
- if _LAYERS.get('head', None) is None:
94
- return None
95
- # fill all the arguments that cannot be pickled
96
- return _app.update_prob_plot(
97
- output_data=output_data,
98
- hidden_data=hidden_data,
99
- head_obj=_LAYERS['head'],)
100
-
101
- # RANKS ########################################################################
102
-
103
- def score_ranks(
104
- output_data: object,
105
- hidden_data: object,
106
- ) -> list:
107
- # exit if the model has not yet been loaded
108
- if _LAYERS.get('head', None) is None:
109
- return []
110
  # do not download the model without the GPU wrapper
111
  __tokenizer = fetch_tokenizer(device='cpu')
112
  # fill all the arguments that cannot be pickled
113
- return _app.update_rank_scores(
114
- output_data=output_data,
115
- hidden_data=hidden_data,
116
- tokenizer_obj=__tokenizer,
117
- head_obj=_LAYERS['head'],)
118
-
119
- def plot_ranks(
120
- output_data: object,
121
- hidden_data: object,
122
  ) -> object:
123
- # exit if the model has not yet been loaded
124
- if _LAYERS.get('head', None) is None:
125
- return None
126
- # fill all the arguments that cannot be pickled
127
- return _app.update_rank_plot(
128
- output_data=output_data,
129
- hidden_data=hidden_data,
130
- head_obj=_LAYERS['head'],)
131
-
132
- # JSD ##########################################################################
133
-
134
- def score_jsd(
135
- layer_idx: float,
136
- output_data: object,
137
- hidden_data: object,
138
- ) -> list:
139
- # exit if the model has not yet been loaded
140
- if (_LAYERS.get('head', None) is None) or (_LAYERS.get('norm', None) is None):
141
- return []
142
  # do not download the model without the GPU wrapper
143
  __tokenizer = fetch_tokenizer(device='cpu')
144
  # fill all the arguments that cannot be pickled
145
- return _app.update_jsd_scores(
146
- layer_idx=layer_idx,
147
- output_data=output_data,
148
- hidden_data=hidden_data,
149
- tokenizer_obj=__tokenizer,
150
- head_obj=_LAYERS['head'],
151
- norm_obj=_LAYERS['norm'],)
152
-
153
- def plot_jsd(
154
- layer_idx: float,
155
- hidden_data: object,
156
  ) -> object:
157
- # exit if the model has not yet been loaded
158
- if _LAYERS.get('head', None) is None:
159
- return None
160
  # fill all the arguments that cannot be pickled
161
- return _app.update_jsd_plot(
162
- layer_idx=layer_idx,
163
- hidden_data=hidden_data,
164
- head_obj=_LAYERS['head'],
165
- norm_obj=_LAYERS['norm'],)
166
 
167
  # MAIN #########################################################################
168
 
169
- demo = _app.create_app(
170
- compute=compute_states,
171
- prob_score=score_probs,
172
- prob_plot=plot_probs,
173
- rank_score=score_ranks,
174
- rank_plot=plot_ranks,
175
- jsd_score=score_jsd,
176
- jsd_plot=plot_jsd)
177
 
178
  demo.queue()
179
- demo.launch(theme=gradio.themes.Soft(), css=psaiops.common.style.BUTTON)
 
6
  import psaiops.common.model
7
  import psaiops.common.style
8
  import psaiops.common.tokenizer
9
+ import psaiops.score.human.app as _app
10
 
11
  # META #########################################################################
12
 
 
14
  psaiops.common.model.get_model(name=_app.MODEL, device='cpu')
15
 
16
  # but do not instantiate unless necessary
 
 
 
17
  _MODELS = {
18
  'cpu': None,
19
  'cuda': None,}
 
28
  # control when the model is downloaded to avoid moving it to the CPU
29
  if _MODELS.get(device, None) is None:
30
  _MODELS[device] = psaiops.common.model.get_model(name=_app.MODEL, device=device)
 
 
 
31
  # tuple of objects or (None, None)
32
  return _MODELS.get(device, None)
33
 
 
61
  model_obj=__model,
62
  tokenizer_obj=__tokenizer)
63
 
64
+ # TOKENS #######################################################################
65
 
66
+ def compute_tokens(
67
+ prompt_str: str,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  ) -> object:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  # do not download the model without the GPU wrapper
70
  __tokenizer = fetch_tokenizer(device='cpu')
71
  # fill all the arguments that cannot be pickled
72
+ return _app.update_tokens_state(
73
+ prompt_str=prompt_str,
74
+ tokenizer_obj=__tokenizer)
75
+
76
+ # INDICES ######################################################################
77
+
78
+ def compute_indices(
79
+ prompt_str: str,
 
80
  ) -> object:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  # do not download the model without the GPU wrapper
82
  __tokenizer = fetch_tokenizer(device='cpu')
83
  # fill all the arguments that cannot be pickled
84
+ return _app.update_indices_state(
85
+ prompt_str=prompt_str,
86
+ tokenizer_obj=__tokenizer)
87
+
88
+ # LOGITS #######################################################################
89
+
90
+ def compute_logits(
91
+ indices_arr: object,
 
 
 
92
  ) -> object:
93
+ # load the model and tokenizer inside the GPU wrapper
94
+ __model = fetch_model(device='cuda')
 
95
  # fill all the arguments that cannot be pickled
96
+ return _app.update_logits_state(
97
+ indices_arr=indices_arr,
98
+ model_obj=__model)
 
 
99
 
100
  # MAIN #########################################################################
101
 
102
+ demo = _app.create_app(partition=compute_tokens, convert=compute_indices, compute=compute_logits)
 
 
 
 
 
 
 
103
 
104
  demo.queue()
105
+ demo.launch(theme=gradio.themes.Soft(), css=psaiops.common.style.ALL, debug=True)