Spaces:

thugCodeNinja
/

ChatGPTtextdetction

Sleeping

App Files Files Community

thugCodeNinja commited on Mar 31, 2024

Commit

b8985e3

verified ·

1 Parent(s): d2552db

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -9

app.py CHANGED Viewed

@@ -3,12 +3,13 @@ import torch
 from torch.nn.functional import softmax
 import shap
 import requests
-from transformers import RobertaTokenizer, pipeline, RobertaModel
 model_dir = 'temp'
 tokenizer = RobertaTokenizer.from_pretrained(model_dir)
-model = RobertaModel.from_pretrained(model_dir)
 def process_text(input_text, input_file):
     if input_text:
         text = input_text
@@ -43,17 +44,19 @@ def process_text(input_text, input_file):
         for item in search_results['items']:
             title = item.get('title', '')
             link = item.get('link', '')
-            similar_articles.append({'title': title, 'link': link})
         return similar_articles[:5]
-    pipe = pipeline('text-classification', model=model, tokenizer=tokenizer)
     prediction = pipe([text])
     explainer = shap.Explainer(pipe)
     shap_values = explainer([text])
-    text_plot = shap.plots.text(shap_values, display=True)
     similar_articles = find_plagiarism(text)
-    return processed_result, prob, final_label, text_plot,similar_articles
 text_input = gr.Textbox(label="Enter text")
 file_input = gr.File(label="Upload a text file")
@@ -62,4 +65,3 @@ title = "Group 2- ChatGPT text detection module"
 description = '''Please upload text files and text input responsibly and await the explainable results. The approach in place includes finetuning a Roberta model for text classification.Once the classifications are done the decision is exaplined thorugh the SHAP text plot.
 The probability is particularly explained by the attention plots through SHAP'''
 gr.Interface(fn=process_text,title=title,description=description, inputs=[text_input, file_input], outputs=outputs).launch()

 from torch.nn.functional import softmax
 import shap
 import requests
+from transformers import RobertaTokenizer,RobertaForSequenceClassification, pipeline
+from IPython.core.display import HTML
 model_dir = 'temp'
 tokenizer = RobertaTokenizer.from_pretrained(model_dir)
+model = RobertaForSequenceClassification.from_pretrained(model_dir)
+#pipe = pipeline("text-classification", model="thugCodeNinja/robertatemp")
+pipe = pipeline("text-classification",model=model,tokenizer=tokenizer)
 def process_text(input_text, input_file):
     if input_text:
         text = input_text
         for item in search_results['items']:
             title = item.get('title', '')
             link = item.get('link', '')
+            similar_articles.append([ title,link])
         return similar_articles[:5]
     prediction = pipe([text])
     explainer = shap.Explainer(pipe)
     shap_values = explainer([text])
+    shap_plot_html = HTML(shap.plots.text(shap_values, display=False)).data
+    # HTML(shap.plots.text(shap_values, display=False))
+    # with open('rendered.html', 'w') as file:
+    #     file.write(shap.plots.text(shap_values, display=False))
     similar_articles = find_plagiarism(text)
+    return processed_result, prob, final_label, shap_plot_html,similar_articles
 text_input = gr.Textbox(label="Enter text")
 file_input = gr.File(label="Upload a text file")
 description = '''Please upload text files and text input responsibly and await the explainable results. The approach in place includes finetuning a Roberta model for text classification.Once the classifications are done the decision is exaplined thorugh the SHAP text plot.
 The probability is particularly explained by the attention plots through SHAP'''
 gr.Interface(fn=process_text,title=title,description=description, inputs=[text_input, file_input], outputs=outputs).launch()