Spaces:

OsBaran
/

2204AFakeNewsDetection

Build error

App Files Files Community

OsBaran commited on Nov 12, 2024

Commit

dc61da1

1 Parent(s): 53c07ae

Add application

Browse files

Files changed (1) hide show

app.py +81 -48

app.py CHANGED Viewed

@@ -13,6 +13,10 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 from keybert import KeyBERT
 import torch
 # Buraya İngilizce modelinizi yazın
 model = AutoModelForSequenceClassification.from_pretrained("OsBaran/Roberta-Classification-Model")
 tokenizer = AutoTokenizer.from_pretrained("roberta-base")
@@ -140,71 +144,100 @@ def sbert_similarity(input_text, bbc_articles):
     # En yüksek benzerlik skoru ve karşılık gelen haber
     max_score, most_similar_news = cosine_scores.max(), bbc_articles[cosine_scores.argmax().item()]
     print(f"En benzer haber skoru: {max_score:.2f}")
 # Türkçe modelini yükle
-model_tr_name = "dbmdz/bert-base-turkish-cased"  # Buraya Türkçe modelinizi yazın
-model_tr = AutoModelForSequenceClassification.from_pretrained(model_tr_name)
-tokenizer_tr = AutoTokenizer.from_pretrained(model_tr_name)
-classifier_tr = pipeline("sentiment-analysis", model=model_tr, tokenizer=tokenizer_tr)
-# Gradio ile API oluştur
-def predict(input_news, language):
-    if language == "en":
-        keywords = extract_keywords_keybert(input_news)
-        search_query = ' '.join(keywords)
-        news_articles = fetch_news_from_api(api_key, search_query)
-        trusted_sources = [
             "bbc news",
             "cnn",
             "reuters.com",
             "theguardian.com",
             "time",
             # Diğer güvenilir kaynaklar...
-        ]
-        trusted_articles = filter_trusted_sources(news_articles, trusted_sources)
         # # Sonuçları yazdır
-        trusted_articles_urls = []
-        for i in trusted_articles:
-            trusted_articles_urls.append(i["url"])
-        if trusted_articles:
-            print(f"\nGüvenilir kaynaklardan bulunan haberler:\n")
-            print(trusted_articles_urls)
-            bbc_articles = [fetch_news_content(link) for link in trusted_articles_urls]
-            similarities = compare_with_thrusted(input_news, bbc_articles)
-            sbert_similarity(input_news, bbc_articles)
-            print(similarities)
-            max_similarity = max(similarities)
-            threshold = 0.8
-            if max_similarity > threshold:
-                print(f"Sonuç: Doğru (Benzerlik: {max_similarity:.2f})")
-            else:
-                # Benzerlik bulunmazsa tahmin algoritmasını kullanın ve açıklama sağlayın
-                prediction = predict_with_roberta(model, tokenizer, input_news)
-                explanation = explain_roberta_prediction(model, tokenizer, input_news)
-                # Tahmin sonucunu yazdır
-                # result = "Doğru" if prediction == 1 else "Yanlış"
-                # print(f"Haberin durumu: {result}")
-                print(explanation)
-                return {explanation}
         else:
-            print("Güvenilir kaynaklardan hiç haber bulunamadı.")
             prediction = predict_with_roberta(model, tokenizer, input_news)
             explanation = explain_roberta_prediction(model, tokenizer, input_news)
             # Tahmin sonucunu yazdır
-            result = "Doğru" if prediction == 1 else "Yanlış"
-            print(f"Haberin durumu: {result}")
-            print("Haberin açıklaması:")
             print(explanation)
-            return {explanation}
     elif language == "tr":
-        result = classifier_tr(text)
     else:
         result = {"error": "Unsupported language"}
     # return result

 from sklearn.metrics.pairwise import cosine_similarity
 from keybert import KeyBERT
 import torch
+from deep_translator import DeeplTranslator
+import torch
+import torch.nn.functional as F
+api_key = "69f73328-5f95-4eda-813a-16af8c688404:fx"
 # Buraya İngilizce modelinizi yazın
 model = AutoModelForSequenceClassification.from_pretrained("OsBaran/Roberta-Classification-Model")
 tokenizer = AutoTokenizer.from_pretrained("roberta-base")
     # En yüksek benzerlik skoru ve karşılık gelen haber
     max_score, most_similar_news = cosine_scores.max(), bbc_articles[cosine_scores.argmax().item()]
     print(f"En benzer haber skoru: {max_score:.2f}")
+def translate_text(text, source_lang='tr', target_lang='en'):
+    translated = DeeplTranslator(api_key=api_key, source=source_lang, target=target_lang).translate(text)
+    return translated
 # Türkçe modelini yükle
+# model_tr_name = "dbmdz/bert-base-turkish-cased"  # Buraya Türkçe modelinizi yazın
+# model_tr = AutoModelForSequenceClassification.from_pretrained(model_tr_name)
+# tokenizer_tr = AutoTokenizer.from_pretrained(model_tr_name)
+# classifier_tr = pipeline("sentiment-analysis", model=model_tr, tokenizer=tokenizer_tr)
+tokenizer_tr = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
+model_tr = AutoModelForSequenceClassification.from_pretrained("OsBaran/Bert-Classification-Model-Tr-3", num_labels=2)
+def trModelPredictAlgo(input_news):
+    inputs = tokenizer(input_news, return_tensors="pt", padding=True, truncation=True, max_length=512)
+    inputs = {key: value.to(device) for key, value in inputs.items()}
+# Modelin tahmin yapması
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits
+    # Softmax uygulama (olasılık hesaplama)
+    probabilities = F.softmax(logits, dim=-1)
+    # En yüksek olasılığı ve sınıfı bulma
+    predicted_class = torch.argmax(probabilities, dim=-1)
+    predicted_probability = probabilities[0, predicted_class].item()
+    # Sonucu yazdırma
+    print(f"Predicted class: {predicted_class.item()}")
+    print(f"Prediction probability: {predicted_probability * 100:.2f}%")
+    return f"Predicted class: {predicted_class.item()}" + f"Prediction probability: {predicted_probability * 100:.2f}%"
+def enModelPredictAlgo(input_news):
+    keywords = extract_keywords_keybert(input_news)
+    search_query = ' '.join(keywords)
+    news_articles = fetch_news_from_api(api_key, search_query)
+    trusted_sources = [
             "bbc news",
             "cnn",
             "reuters.com",
             "theguardian.com",
             "time",
             # Diğer güvenilir kaynaklar...
+    ]
+    trusted_articles = filter_trusted_sources(news_articles, trusted_sources)
         # # Sonuçları yazdır
+    trusted_articles_urls = []
+    for i in trusted_articles:
+        trusted_articles_urls.append(i["url"])
+    if trusted_articles:
+        print(f"\nGüvenilir kaynaklardan bulunan haberler:\n")
+        print(trusted_articles_urls)
+        bbc_articles = [fetch_news_content(link) for link in trusted_articles_urls]
+        similarities = compare_with_thrusted(input_news, bbc_articles)
+        sbert_similarity(input_news, bbc_articles)
+        print(similarities)
+        max_similarity = max(similarities)
+        threshold = 0.8
+        if max_similarity > threshold:
+            print(f"Sonuç: Doğru (Benzerlik: {max_similarity:.2f})")
         else:
+                # Benzerlik bulunmazsa tahmin algoritmasını kullanın ve açıklama sağlayın
             prediction = predict_with_roberta(model, tokenizer, input_news)
             explanation = explain_roberta_prediction(model, tokenizer, input_news)
             # Tahmin sonucunu yazdır
+            # result = "Doğru" if prediction == 1 else "Yanlış"
+            # print(f"Haberin durumu: {result}")
             print(explanation)
+            return explanation
+    else:
+        print("Güvenilir kaynaklardan hiç haber bulunamadı.")
+        prediction = predict_with_roberta(model, tokenizer, input_news)
+        explanation = explain_roberta_prediction(model, tokenizer, input_news)
+            # Tahmin sonucunu yazdır
+        result = "Doğru" if prediction == 1 else "Yanlış"
+        print(f"Haberin durumu: {result}")
+        print("Haberin açıklaması:")
+        print(explanation)
+        return explanation
+# Gradio ile API oluştur
+def predict(input_news, language):
+    if language == "en":
+        result = enModelPredictAlgo(input_news=input_news)
+        return {"Sonuç": result}
     elif language == "tr":
+        input_news_en= translate_text(input_news)
+        result1 = enModelPredictAlgo(input_news_en)
+        result2= trModelPredictAlgo(input_news=input_news)
+        return {"İngilizce Model Sonucu": result1, "Türkçe Model Sonucu": result2}
     else:
         result = {"error": "Unsupported language"}
     # return result