File size: 3,733 Bytes
945cec7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from flask import Flask, request, jsonify
from flask_cors import CORS
import os
from dotenv import load_dotenv
from transformers import pipeline
import feedparser
import json
from dateutil import parser
import re

load_dotenv()
# Load Setiment Classifier
sentiment_analysis = pipeline(
    "sentiment-analysis", model="siebert/sentiment-roberta-large-english"
)
app = Flask(__name__, static_url_path="", static_folder="dist")

CORS(app)


@app.route("/")
def index():
    return app.send_static_file("index.html")


@app.route("/api/news")
def get_news():
    feed_url = request.args.get("feed_url")
    # check if string is a valid

    # file name for cache
    file_name = "".join(re.split(r"https://|\.|/", feed_url))

    feed_entries = get_feed(feed_url)
    # filter only titles for sentiment analysis
    try:
        with open(f"{file_name}_cache.json") as file:
            cache = json.load(file)
    except:
        cache = {}

    # if new homepage is newer than cache, update cache and return
    print("new date", feed_entries["last_update"])
    print("old date", cache["last_update"] if "last_update" in cache else "None")
    if not cache or parser.parse(feed_entries["last_update"]) > parser.parse(
        cache["last_update"]
    ):
        print("Updating cache with new preditions")
        titles = [entry["title"] for entry in feed_entries["entries"]]
        # run sentiment analysis on titles
        predictions = [sentiment_analysis(sentence) for sentence in titles]
        # parse Negative and Positive, normalize to -1 to 1
        predictions = [
            -prediction[0]["score"]
            if prediction[0]["label"] == "NEGATIVE"
            else prediction[0]["score"]
            for prediction in predictions
        ]
        # merge rss data with predictions
        entries_predicitons = [
            {**entry, "sentiment": prediction}
            for entry, prediction in zip(feed_entries["entries"], predictions)
        ]
        output = {
            "entries": entries_predicitons,
            "last_update": feed_entries["last_update"],
        }
        # update last precitions cache
        with open(f"{file_name}_cache.json", "w") as file:
            json.dump(output, file)
        # send back json
        return jsonify(output)
    else:
        print("Returning cached predictions")
        return jsonify(cache)


@app.route("/api/predict", methods=["POST"])
def predict():
    # get data from POST
    if request.method == "POST":
        # get current news
        # get post body data
        data = request.get_json()
        if data.get("sentences") is None:
            return jsonify({"error": "No text provided"})
        # get post expeceted to be under {'sentences': ['text': '...']}
        sentences = data.get("sentences")
        # prencit sentiments
        predictions = [sentiment_analysis(sentence) for sentence in sentences]
        # parse Negative and Positive, normalize to -1 to 1
        predictions = [
            -prediction[0]["score"]
            if prediction[0]["label"] == "NEGATIVE"
            else prediction[0]["score"]
            for prediction in predictions
        ]
        output = [
            dict(sentence=sentence, sentiment=prediction)
            for sentence, prediction in zip(sentences, predictions)
        ]
        # send back json
        return jsonify(output)


def get_feed(feed_url):
    feed = feedparser.parse(feed_url)
    return {"entries": feed["entries"], "last_update": feed["feed"]["updated"]}


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))