File size: 2,675 Bytes
3c23ea3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os
import requests
from bs4 import BeautifulSoup
from flask import Flask, request, jsonify
from flask_cors import CORS
from transformers import pipeline

app = Flask(__name__)
CORS(app)

print("Initializing AI... Downloading robust RoBERTa model. Patience.")
try:
    # Upgrading to a highly generalized, multi-source fine-tuned model
    # to severely reduce formatting and keyword bias.
    classifier = pipeline("text-classification", model="hamzab/roberta-fake-news-classification")
    print("Model loaded successfully. Server is ready.")
except Exception as e:
    print(f"Error loading model: {e}")


def extract_text_from_url(url):
    """Scrapes the main paragraph text from a given URL."""
    try:
        headers = {'User-Agent': 'Mozilla/5.0'}
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        paragraphs = soup.find_all('p')
        text = ' '.join([p.get_text() for p in paragraphs])
        return text.strip()
    except Exception:
        return None


@app.route('/health', methods=['GET'])
def health_check():
    return jsonify({"status": "online"}), 200


@app.route('/analyze', methods=['POST'])
def analyze_content():
    data = request.get_json()
    if not data:
        return jsonify({"error": "No payload provided."}), 400

    content = ""
    if 'url' in data:
        content = extract_text_from_url(data['url'])
        if not content:
            return jsonify({"error": "Could not extract text from the provided URL."}), 400
    elif 'text' in data:
        content = data['text']
    else:
        return jsonify({"error": "Invalid payload format."}), 400

    if len(content) < 15:
        return jsonify({"error": "Content too short for accurate analysis."}), 400

    truncated_content = content[:2500]

    try:
        # Run inference
        prediction = classifier(truncated_content)[0]
        label = prediction['label'].lower()
        confidence = round(prediction['score'] * 100)

        # For this model: LABEL_0 is Fake, LABEL_1 is Real
        if "0" in label or "fake" in label:
            verdict = "fake"
        elif "1" in label or "real" in label or "true" in label:
            verdict = "real"
        else:
            verdict = "inconclusive"

        return jsonify({
            "verdict": verdict,
            "confidence": confidence,
            "text_preview": content[:150] + "..." if len(content) > 150 else content
        }), 200

    except Exception as e:
        return jsonify({"error": str(e)}), 500


if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)