Spaces:
Running on Zero
Running on Zero
apehex commited on
Commit ·
2431602
1
Parent(s): 170fb4c
Rebrand the app as a human / LLM detector.
Browse files
README.md
CHANGED
|
@@ -1,20 +1,20 @@
|
|
| 1 |
---
|
| 2 |
pinned: false
|
| 3 |
-
title: '
|
| 4 |
-
emoji: '
|
| 5 |
-
colorFrom: '
|
| 6 |
colorTo: 'red'
|
| 7 |
sdk: 'gradio'
|
| 8 |
-
sdk_version: '6.
|
| 9 |
app_file: 'app.py'
|
| 10 |
license: 'agpl-3.0'
|
| 11 |
-
short_description: '
|
| 12 |
models: ['openai/gpt-oss-20b']
|
| 13 |
---
|
| 14 |
|
| 15 |
-
## psAI-Ops:
|
| 16 |
|
| 17 |
-
|
| 18 |
|
| 19 |
Licensed under the [aGPLv3][github-license].
|
| 20 |
|
|
|
|
| 1 |
---
|
| 2 |
pinned: false
|
| 3 |
+
title: 'Human Scores'
|
| 4 |
+
emoji: '🤖'
|
| 5 |
+
colorFrom: 'green'
|
| 6 |
colorTo: 'red'
|
| 7 |
sdk: 'gradio'
|
| 8 |
+
sdk_version: '6.8.0'
|
| 9 |
app_file: 'app.py'
|
| 10 |
license: 'agpl-3.0'
|
| 11 |
+
short_description: 'Parse LLM / human sections in text samples.'
|
| 12 |
models: ['openai/gpt-oss-20b']
|
| 13 |
---
|
| 14 |
|
| 15 |
+
## psAI-Ops: Human Scores <img src="images/logo.png" alt="apehex logo" width="32" height="32">
|
| 16 |
|
| 17 |
+
Leverage an open source LLM as critic to take apart the text sections written by a human from those generated by an AI.
|
| 18 |
|
| 19 |
Licensed under the [aGPLv3][github-license].
|
| 20 |
|
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import spaces
|
|
| 6 |
import psaiops.common.model
|
| 7 |
import psaiops.common.style
|
| 8 |
import psaiops.common.tokenizer
|
| 9 |
-
import psaiops.score.
|
| 10 |
|
| 11 |
# META #########################################################################
|
| 12 |
|
|
@@ -14,9 +14,6 @@ import psaiops.score.surprisal.app as _app
|
|
| 14 |
psaiops.common.model.get_model(name=_app.MODEL, device='cpu')
|
| 15 |
|
| 16 |
# but do not instantiate unless necessary
|
| 17 |
-
_LAYERS = {
|
| 18 |
-
'norm': None,
|
| 19 |
-
'head': None,}
|
| 20 |
_MODELS = {
|
| 21 |
'cpu': None,
|
| 22 |
'cuda': None,}
|
|
@@ -31,9 +28,6 @@ def fetch_model(device: str='cuda') -> object:
|
|
| 31 |
# control when the model is downloaded to avoid moving it to the CPU
|
| 32 |
if _MODELS.get(device, None) is None:
|
| 33 |
_MODELS[device] = psaiops.common.model.get_model(name=_app.MODEL, device=device)
|
| 34 |
-
# move specific layers to the CPU for the computation of the metrics
|
| 35 |
-
_LAYERS['norm'] = copy.deepcopy(_MODELS[device].model.norm).cpu()
|
| 36 |
-
_LAYERS['head'] = copy.deepcopy(_MODELS[device].lm_head).cpu()
|
| 37 |
# tuple of objects or (None, None)
|
| 38 |
return _MODELS.get(device, None)
|
| 39 |
|
|
@@ -67,113 +61,45 @@ def compute_states(
|
|
| 67 |
model_obj=__model,
|
| 68 |
tokenizer_obj=__tokenizer)
|
| 69 |
|
| 70 |
-
#
|
| 71 |
|
| 72 |
-
def
|
| 73 |
-
|
| 74 |
-
hidden_data: object,
|
| 75 |
-
) -> list:
|
| 76 |
-
# exit if the model has not yet been loaded
|
| 77 |
-
if _LAYERS.get('head', None) is None:
|
| 78 |
-
return []
|
| 79 |
-
# do not download the model without the GPU wrapper
|
| 80 |
-
__tokenizer = fetch_tokenizer(device='cpu')
|
| 81 |
-
# fill all the arguments that cannot be pickled
|
| 82 |
-
return _app.update_prob_scores(
|
| 83 |
-
output_data=output_data,
|
| 84 |
-
hidden_data=hidden_data,
|
| 85 |
-
tokenizer_obj=__tokenizer,
|
| 86 |
-
head_obj=_LAYERS['head'],)
|
| 87 |
-
|
| 88 |
-
def plot_probs(
|
| 89 |
-
output_data: object,
|
| 90 |
-
hidden_data: object,
|
| 91 |
) -> object:
|
| 92 |
-
# exit if the model has not yet been loaded
|
| 93 |
-
if _LAYERS.get('head', None) is None:
|
| 94 |
-
return None
|
| 95 |
-
# fill all the arguments that cannot be pickled
|
| 96 |
-
return _app.update_prob_plot(
|
| 97 |
-
output_data=output_data,
|
| 98 |
-
hidden_data=hidden_data,
|
| 99 |
-
head_obj=_LAYERS['head'],)
|
| 100 |
-
|
| 101 |
-
# RANKS ########################################################################
|
| 102 |
-
|
| 103 |
-
def score_ranks(
|
| 104 |
-
output_data: object,
|
| 105 |
-
hidden_data: object,
|
| 106 |
-
) -> list:
|
| 107 |
-
# exit if the model has not yet been loaded
|
| 108 |
-
if _LAYERS.get('head', None) is None:
|
| 109 |
-
return []
|
| 110 |
# do not download the model without the GPU wrapper
|
| 111 |
__tokenizer = fetch_tokenizer(device='cpu')
|
| 112 |
# fill all the arguments that cannot be pickled
|
| 113 |
-
return _app.
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
def
|
| 120 |
-
|
| 121 |
-
hidden_data: object,
|
| 122 |
) -> object:
|
| 123 |
-
# exit if the model has not yet been loaded
|
| 124 |
-
if _LAYERS.get('head', None) is None:
|
| 125 |
-
return None
|
| 126 |
-
# fill all the arguments that cannot be pickled
|
| 127 |
-
return _app.update_rank_plot(
|
| 128 |
-
output_data=output_data,
|
| 129 |
-
hidden_data=hidden_data,
|
| 130 |
-
head_obj=_LAYERS['head'],)
|
| 131 |
-
|
| 132 |
-
# JSD ##########################################################################
|
| 133 |
-
|
| 134 |
-
def score_jsd(
|
| 135 |
-
layer_idx: float,
|
| 136 |
-
output_data: object,
|
| 137 |
-
hidden_data: object,
|
| 138 |
-
) -> list:
|
| 139 |
-
# exit if the model has not yet been loaded
|
| 140 |
-
if (_LAYERS.get('head', None) is None) or (_LAYERS.get('norm', None) is None):
|
| 141 |
-
return []
|
| 142 |
# do not download the model without the GPU wrapper
|
| 143 |
__tokenizer = fetch_tokenizer(device='cpu')
|
| 144 |
# fill all the arguments that cannot be pickled
|
| 145 |
-
return _app.
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
def plot_jsd(
|
| 154 |
-
layer_idx: float,
|
| 155 |
-
hidden_data: object,
|
| 156 |
) -> object:
|
| 157 |
-
#
|
| 158 |
-
|
| 159 |
-
return None
|
| 160 |
# fill all the arguments that cannot be pickled
|
| 161 |
-
return _app.
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
head_obj=_LAYERS['head'],
|
| 165 |
-
norm_obj=_LAYERS['norm'],)
|
| 166 |
|
| 167 |
# MAIN #########################################################################
|
| 168 |
|
| 169 |
-
demo = _app.create_app(
|
| 170 |
-
compute=compute_states,
|
| 171 |
-
prob_score=score_probs,
|
| 172 |
-
prob_plot=plot_probs,
|
| 173 |
-
rank_score=score_ranks,
|
| 174 |
-
rank_plot=plot_ranks,
|
| 175 |
-
jsd_score=score_jsd,
|
| 176 |
-
jsd_plot=plot_jsd)
|
| 177 |
|
| 178 |
demo.queue()
|
| 179 |
-
demo.launch(theme=gradio.themes.Soft(), css=psaiops.common.style.
|
|
|
|
| 6 |
import psaiops.common.model
|
| 7 |
import psaiops.common.style
|
| 8 |
import psaiops.common.tokenizer
|
| 9 |
+
import psaiops.score.human.app as _app
|
| 10 |
|
| 11 |
# META #########################################################################
|
| 12 |
|
|
|
|
| 14 |
psaiops.common.model.get_model(name=_app.MODEL, device='cpu')
|
| 15 |
|
| 16 |
# but do not instantiate unless necessary
|
|
|
|
|
|
|
|
|
|
| 17 |
_MODELS = {
|
| 18 |
'cpu': None,
|
| 19 |
'cuda': None,}
|
|
|
|
| 28 |
# control when the model is downloaded to avoid moving it to the CPU
|
| 29 |
if _MODELS.get(device, None) is None:
|
| 30 |
_MODELS[device] = psaiops.common.model.get_model(name=_app.MODEL, device=device)
|
|
|
|
|
|
|
|
|
|
| 31 |
# tuple of objects or (None, None)
|
| 32 |
return _MODELS.get(device, None)
|
| 33 |
|
|
|
|
| 61 |
model_obj=__model,
|
| 62 |
tokenizer_obj=__tokenizer)
|
| 63 |
|
| 64 |
+
# TOKENS #######################################################################
|
| 65 |
|
| 66 |
+
def compute_tokens(
|
| 67 |
+
prompt_str: str,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
) -> object:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
# do not download the model without the GPU wrapper
|
| 70 |
__tokenizer = fetch_tokenizer(device='cpu')
|
| 71 |
# fill all the arguments that cannot be pickled
|
| 72 |
+
return _app.update_tokens_state(
|
| 73 |
+
prompt_str=prompt_str,
|
| 74 |
+
tokenizer_obj=__tokenizer)
|
| 75 |
+
|
| 76 |
+
# INDICES ######################################################################
|
| 77 |
+
|
| 78 |
+
def compute_indices(
|
| 79 |
+
prompt_str: str,
|
|
|
|
| 80 |
) -> object:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
# do not download the model without the GPU wrapper
|
| 82 |
__tokenizer = fetch_tokenizer(device='cpu')
|
| 83 |
# fill all the arguments that cannot be pickled
|
| 84 |
+
return _app.update_indices_state(
|
| 85 |
+
prompt_str=prompt_str,
|
| 86 |
+
tokenizer_obj=__tokenizer)
|
| 87 |
+
|
| 88 |
+
# LOGITS #######################################################################
|
| 89 |
+
|
| 90 |
+
def compute_logits(
|
| 91 |
+
indices_arr: object,
|
|
|
|
|
|
|
|
|
|
| 92 |
) -> object:
|
| 93 |
+
# load the model and tokenizer inside the GPU wrapper
|
| 94 |
+
__model = fetch_model(device='cuda')
|
|
|
|
| 95 |
# fill all the arguments that cannot be pickled
|
| 96 |
+
return _app.update_logits_state(
|
| 97 |
+
indices_arr=indices_arr,
|
| 98 |
+
model_obj=__model)
|
|
|
|
|
|
|
| 99 |
|
| 100 |
# MAIN #########################################################################
|
| 101 |
|
| 102 |
+
demo = _app.create_app(partition=compute_tokens, convert=compute_indices, compute=compute_logits)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
demo.queue()
|
| 105 |
+
demo.launch(theme=gradio.themes.Soft(), css=psaiops.common.style.ALL, debug=True)
|