Spaces:

thugCodeNinja
/

ChatGPTtextdetction

Sleeping

App Files Files Community

thugCodeNinja commited on Apr 1, 2024

Commit

2211ff7

verified ·

1 Parent(s): 1525307

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -7

app.py CHANGED Viewed

@@ -11,8 +11,6 @@ model_dir = 'temp'
 tokenizer = RobertaTokenizer.from_pretrained(model_dir)
 model = RobertaForSequenceClassification.from_pretrained(model_dir)
 #pipe = pipeline("text-classification", model="thugCodeNinja/robertatemp")
-tokenizer1 = RobertaTokenizer.from_pretrained('roberta-base')
-model1 = RobertaModel.from_pretrained('roberta-base')
 pipe = pipeline("text-classification",model=model,tokenizer=tokenizer)
 def process_text(input_text):
     if input_text:
@@ -64,15 +62,15 @@ def process_text(input_text):
             # Calculate embeddings using the model
                 with torch.no_grad():
-                    embedding1 = model1(**encoding1).last_hidden_state.mean(dim=1)
-                    embedding2 = model1(**encoding2).last_hidden_state.mean(dim=1)
             # Calculate cosine similarity between the input text and the article text embeddings
                 similarity = cosine_similarity(embedding1, embedding2)[0][0]
-                similar_articles.append({'Link': link, 'Similarity': similarity})
         similar_articles = sorted(similar_articles, key=lambda x: x['Similarity'], reverse=True)
         threshold = 0.5  # Adjust the threshold as needed
-        similar_articles = [article for article in similar_articles if article['Similarity'] > threshold]
         return similar_articles[:5]
     prediction = pipe([text])
@@ -84,7 +82,7 @@ def process_text(input_text):
     return processed_result, prob, final_label, shap_plot_html,similar_articles
 text_input = gr.Textbox(label="Enter text")
-outputs = [gr.Textbox(label="Processed text"), gr.Textbox(label="Probability"), gr.Textbox(label="Label"), gr.HTML(label="SHAP Plot"),gr.Dataframe(label="Similar Articles", headers=["Title", "Link"],row_count=5)]
 title = "Group 2- ChatGPT text detection module"
 description = '''Please upload text files and text input responsibly and await the explainable results. The approach in place includes finetuning a Roberta model for text classification.Once the classifications are done the decision is exaplined thorugh the SHAP text plot.
 The probability is particularly explained by the attention plots through SHAP'''

 tokenizer = RobertaTokenizer.from_pretrained(model_dir)
 model = RobertaForSequenceClassification.from_pretrained(model_dir)
 #pipe = pipeline("text-classification", model="thugCodeNinja/robertatemp")
 pipe = pipeline("text-classification",model=model,tokenizer=tokenizer)
 def process_text(input_text):
     if input_text:
             # Calculate embeddings using the model
                 with torch.no_grad():
+                    embedding1 = model(**encoding1).last_hidden_state.mean(dim=1)
+                    embedding2 = model(**encoding2).last_hidden_state.mean(dim=1)
             # Calculate cosine similarity between the input text and the article text embeddings
                 similarity = cosine_similarity(embedding1, embedding2)[0][0]
+                if similarity > threshold:
+                    similar_articles.append({'Link': link, 'Similarity': similarity})
         similar_articles = sorted(similar_articles, key=lambda x: x['Similarity'], reverse=True)
         threshold = 0.5  # Adjust the threshold as needed
         return similar_articles[:5]
     prediction = pipe([text])
     return processed_result, prob, final_label, shap_plot_html,similar_articles
 text_input = gr.Textbox(label="Enter text")
+outputs = [gr.Textbox(label="Processed text"), gr.Textbox(label="Probability"), gr.Textbox(label="Label"), gr.HTML(label="SHAP Plot"),gr.Dataframe(label="Similar Articles", headers=["Link", "Similarity"],row_count=5)]
 title = "Group 2- ChatGPT text detection module"
 description = '''Please upload text files and text input responsibly and await the explainable results. The approach in place includes finetuning a Roberta model for text classification.Once the classifications are done the decision is exaplined thorugh the SHAP text plot.
 The probability is particularly explained by the attention plots through SHAP'''