Spaces:

thugCodeNinja
/

ChatGPTtextdetction

Sleeping

App Files Files Community

thugCodeNinja commited on Apr 1, 2024

Commit

82271e0

verified ·

1 Parent(s): 6aaf7b2

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -14

app.py CHANGED Viewed

@@ -39,16 +39,16 @@ def process_text(input_text):
         except Exception as e:
             return {'error': str(e)}
     def get_article_text(url):
-    try:
-        response = requests.get(url)
-        if response.status_code == 200:
-            soup = BeautifulSoup(response.content, 'html.parser')
             # Extract text from the article content (you may need to adjust this based on the website's structure)
-            article_text = ' '.join([p.get_text() for p in soup.find_all('p')])
             return article_text
-    except Exception as e:
-        print(f"An error occurred: {e}")
-    return ''
     def find_plagiarism(text):
         search_results = search(text)
         if 'items' not in search_results:
@@ -68,12 +68,12 @@ def process_text(input_text):
                     embedding2 = model(**encoding2).last_hidden_state.mean(dim=1)
             # Calculate cosine similarity between the input text and the article text embeddings
-            similarity = cosine_similarity(embedding1, embedding2)[0][0]
-            similar_articles.append({'Link': link, 'Similarity': similarity})
-            similar_articles = sorted(similar_articles, key=lambda x: x['Similarity'], reverse=True)
-            threshold = 0.5  # Adjust the threshold as needed
-            similar_articles = [article for article in similar_articles if article['Similarity'] > threshold]
-            return similar_articles[:5]
     prediction = pipe([text])
     explainer = shap.Explainer(pipe)

         except Exception as e:
             return {'error': str(e)}
     def get_article_text(url):
+        try:
+            response = requests.get(url)
+            if response.status_code == 200:
+                soup = BeautifulSoup(response.content, 'html.parser')
             # Extract text from the article content (you may need to adjust this based on the website's structure)
+                article_text = ' '.join([p.get_text() for p in soup.find_all('p')])
             return article_text
+        except Exception as e:
+            print(f"An error occurred: {e}")
+        return ''
     def find_plagiarism(text):
         search_results = search(text)
         if 'items' not in search_results:
                     embedding2 = model(**encoding2).last_hidden_state.mean(dim=1)
             # Calculate cosine similarity between the input text and the article text embeddings
+                similarity = cosine_similarity(embedding1, embedding2)[0][0]
+                similar_articles.append({'Link': link, 'Similarity': similarity})
+        similar_articles = sorted(similar_articles, key=lambda x: x['Similarity'], reverse=True)
+        threshold = 0.5  # Adjust the threshold as needed
+        similar_articles = [article for article in similar_articles if article['Similarity'] > threshold]
+        return similar_articles[:5]
     prediction = pipe([text])
     explainer = shap.Explainer(pipe)