Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,12 +3,13 @@ import torch
|
|
| 3 |
from torch.nn.functional import softmax
|
| 4 |
import shap
|
| 5 |
import requests
|
| 6 |
-
from transformers import RobertaTokenizer, pipeline
|
|
|
|
| 7 |
model_dir = 'temp'
|
| 8 |
tokenizer = RobertaTokenizer.from_pretrained(model_dir)
|
| 9 |
-
model =
|
| 10 |
-
|
| 11 |
-
|
| 12 |
def process_text(input_text, input_file):
|
| 13 |
if input_text:
|
| 14 |
text = input_text
|
|
@@ -43,17 +44,19 @@ def process_text(input_text, input_file):
|
|
| 43 |
for item in search_results['items']:
|
| 44 |
title = item.get('title', '')
|
| 45 |
link = item.get('link', '')
|
| 46 |
-
similar_articles.append(
|
| 47 |
return similar_articles[:5]
|
| 48 |
|
| 49 |
-
pipe = pipeline('text-classification', model=model, tokenizer=tokenizer)
|
| 50 |
prediction = pipe([text])
|
| 51 |
explainer = shap.Explainer(pipe)
|
| 52 |
shap_values = explainer([text])
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
| 54 |
similar_articles = find_plagiarism(text)
|
| 55 |
|
| 56 |
-
return processed_result, prob, final_label,
|
| 57 |
|
| 58 |
text_input = gr.Textbox(label="Enter text")
|
| 59 |
file_input = gr.File(label="Upload a text file")
|
|
@@ -62,4 +65,3 @@ title = "Group 2- ChatGPT text detection module"
|
|
| 62 |
description = '''Please upload text files and text input responsibly and await the explainable results. The approach in place includes finetuning a Roberta model for text classification.Once the classifications are done the decision is exaplined thorugh the SHAP text plot.
|
| 63 |
The probability is particularly explained by the attention plots through SHAP'''
|
| 64 |
gr.Interface(fn=process_text,title=title,description=description, inputs=[text_input, file_input], outputs=outputs).launch()
|
| 65 |
-
|
|
|
|
| 3 |
from torch.nn.functional import softmax
|
| 4 |
import shap
|
| 5 |
import requests
|
| 6 |
+
from transformers import RobertaTokenizer,RobertaForSequenceClassification, pipeline
|
| 7 |
+
from IPython.core.display import HTML
|
| 8 |
model_dir = 'temp'
|
| 9 |
tokenizer = RobertaTokenizer.from_pretrained(model_dir)
|
| 10 |
+
model = RobertaForSequenceClassification.from_pretrained(model_dir)
|
| 11 |
+
#pipe = pipeline("text-classification", model="thugCodeNinja/robertatemp")
|
| 12 |
+
pipe = pipeline("text-classification",model=model,tokenizer=tokenizer)
|
| 13 |
def process_text(input_text, input_file):
|
| 14 |
if input_text:
|
| 15 |
text = input_text
|
|
|
|
| 44 |
for item in search_results['items']:
|
| 45 |
title = item.get('title', '')
|
| 46 |
link = item.get('link', '')
|
| 47 |
+
similar_articles.append([ title,link])
|
| 48 |
return similar_articles[:5]
|
| 49 |
|
|
|
|
| 50 |
prediction = pipe([text])
|
| 51 |
explainer = shap.Explainer(pipe)
|
| 52 |
shap_values = explainer([text])
|
| 53 |
+
shap_plot_html = HTML(shap.plots.text(shap_values, display=False)).data
|
| 54 |
+
# HTML(shap.plots.text(shap_values, display=False))
|
| 55 |
+
# with open('rendered.html', 'w') as file:
|
| 56 |
+
# file.write(shap.plots.text(shap_values, display=False))
|
| 57 |
similar_articles = find_plagiarism(text)
|
| 58 |
|
| 59 |
+
return processed_result, prob, final_label, shap_plot_html,similar_articles
|
| 60 |
|
| 61 |
text_input = gr.Textbox(label="Enter text")
|
| 62 |
file_input = gr.File(label="Upload a text file")
|
|
|
|
| 65 |
description = '''Please upload text files and text input responsibly and await the explainable results. The approach in place includes finetuning a Roberta model for text classification.Once the classifications are done the decision is exaplined thorugh the SHAP text plot.
|
| 66 |
The probability is particularly explained by the attention plots through SHAP'''
|
| 67 |
gr.Interface(fn=process_text,title=title,description=description, inputs=[text_input, file_input], outputs=outputs).launch()
|
|
|