Spaces:
Runtime error
Runtime error
Commit
·
58c2772
1
Parent(s):
950bf93
Upload 3 files
Browse files- app.py +13 -0
- requirements.txt +58 -0
- views.py +75 -0
app.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from flask import Flask
|
| 3 |
+
from views import views
|
| 4 |
+
|
| 5 |
+
app = Flask(__name__)
|
| 6 |
+
app.register_blueprint(views, url_prefix='/reddit-app')
|
| 7 |
+
|
| 8 |
+
@app.route('/', methods=['POST', 'GET'])
|
| 9 |
+
def reddit_app_home():
|
| 10 |
+
return
|
| 11 |
+
|
| 12 |
+
if __name__ == '__main__':
|
| 13 |
+
app.run(debug=True, port=8000, host='0.0.0.0')
|
requirements.txt
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-i https://pypi.org/simple
|
| 2 |
+
bertopic==0.13.0
|
| 3 |
+
certifi==2022.12.7 ; python_version >= '3.6'
|
| 4 |
+
charset-normalizer==3.0.1
|
| 5 |
+
click==8.1.3 ; python_version >= '3.7'
|
| 6 |
+
cython==0.29.33 ; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'
|
| 7 |
+
filelock==3.9.0 ; python_version >= '3.7'
|
| 8 |
+
flask==2.2.2
|
| 9 |
+
hdbscan==0.8.29
|
| 10 |
+
huggingface-hub==0.11.1 ; python_full_version >= '3.7.0'
|
| 11 |
+
idna==3.4 ; python_version >= '3.5'
|
| 12 |
+
importlib-metadata==6.0.0 ; python_version < '3.10'
|
| 13 |
+
itsdangerous==2.1.2 ; python_version >= '3.7'
|
| 14 |
+
jinja2==3.1.2 ; python_version >= '3.7'
|
| 15 |
+
joblib==1.2.0 ; python_version >= '3.7'
|
| 16 |
+
llvmlite==0.39.1 ; python_version >= '3.7'
|
| 17 |
+
markupsafe==2.1.1 ; python_version >= '3.7'
|
| 18 |
+
nltk==3.8.1 ; python_version >= '3.7'
|
| 19 |
+
numba==0.56.4 ; python_version >= '3.7'
|
| 20 |
+
numpy==1.23.5 ; python_version >= '3.8'
|
| 21 |
+
nvidia-cublas-cu11==11.10.3.66 ; platform_system == 'Linux'
|
| 22 |
+
nvidia-cuda-nvrtc-cu11==11.7.99 ; platform_system == 'Linux'
|
| 23 |
+
nvidia-cuda-runtime-cu11==11.7.99 ; platform_system == 'Linux'
|
| 24 |
+
nvidia-cudnn-cu11==8.5.0.96 ; platform_system == 'Linux'
|
| 25 |
+
packaging==23.0 ; python_version >= '3.7'
|
| 26 |
+
pandas==1.5.2
|
| 27 |
+
pillow==9.4.0 ; python_version >= '3.7'
|
| 28 |
+
plotly==5.12.0 ; python_version >= '3.6'
|
| 29 |
+
praw==7.6.1
|
| 30 |
+
prawcore==2.3.0 ; python_version ~= '3.6'
|
| 31 |
+
pynndescent==0.5.8
|
| 32 |
+
python-dateutil==2.8.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'
|
| 33 |
+
python-dotenv==0.21.0
|
| 34 |
+
pytz==2022.7.1
|
| 35 |
+
pyyaml==5.4.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
|
| 36 |
+
regex==2022.10.31 ; python_version >= '3.6'
|
| 37 |
+
requests==2.28.2 ; python_version >= '3.7' and python_version < '4'
|
| 38 |
+
scikit-learn==1.2.0 ; python_version >= '3.8'
|
| 39 |
+
scipy==1.10.0 ; python_version < '3.12' and python_version >= '3.8'
|
| 40 |
+
sentence-transformers==2.2.2 ; python_full_version >= '3.6.0'
|
| 41 |
+
sentencepiece==0.1.97
|
| 42 |
+
setuptools==66.0.0 ; python_version >= '3.7'
|
| 43 |
+
six==1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'
|
| 44 |
+
tenacity==8.1.0 ; python_version >= '3.6'
|
| 45 |
+
threadpoolctl==3.1.0 ; python_version >= '3.6'
|
| 46 |
+
tokenizers==0.13.2
|
| 47 |
+
torch==1.13.1 ; python_full_version >= '3.7.0'
|
| 48 |
+
torchvision==0.14.1 ; python_version >= '3.7'
|
| 49 |
+
tqdm==4.64.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
| 50 |
+
transformers==4.25.1 ; python_full_version >= '3.7.0'
|
| 51 |
+
typing-extensions==4.4.0 ; python_version >= '3.7'
|
| 52 |
+
umap-learn==0.5.3
|
| 53 |
+
update-checker==0.18.0
|
| 54 |
+
urllib3==1.26.14 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
|
| 55 |
+
websocket-client==1.4.2 ; python_version >= '3.7'
|
| 56 |
+
werkzeug==2.2.2 ; python_version >= '3.7'
|
| 57 |
+
wheel==0.38.4 ; python_version >= '3.7'
|
| 58 |
+
zipp==3.11.0 ; python_version >= '3.7'
|
views.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import tempfile
|
| 4 |
+
from bertopic import BERTopic
|
| 5 |
+
from src.reddit import RedditBot
|
| 6 |
+
from flask import Blueprint, render_template, request, send_file, redirect, url_for, send_from_directory
|
| 7 |
+
|
| 8 |
+
DOWNLOADS_PATH = os.path.join(os.getcwd(), 'downloads')
|
| 9 |
+
|
| 10 |
+
views = Blueprint(__name__, 'views')
|
| 11 |
+
reddit = RedditBot()
|
| 12 |
+
topic_model = BERTopic()
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def retrieve_subreddits(data: dict) -> pd.DataFrame:
|
| 16 |
+
# Retrieve subreddits through its API
|
| 17 |
+
posts = reddit.get_subreddits_posts(
|
| 18 |
+
name=data.get('subreddit'),
|
| 19 |
+
type=data.get('type'),
|
| 20 |
+
amount=int(data.get('amount'))
|
| 21 |
+
)
|
| 22 |
+
df = reddit.convert_posts_to_df(posts=posts)
|
| 23 |
+
df['Text'] = df.apply(lambda row: row.Title + ': ' + row.Content, axis=1)
|
| 24 |
+
return df
|
| 25 |
+
|
| 26 |
+
@views.route('/', methods=['POST', 'GET'])
|
| 27 |
+
def home():
|
| 28 |
+
data = request.form
|
| 29 |
+
if request.method == 'POST':
|
| 30 |
+
if (int(data.get('amount')) < 0 or int(data.get('amount')) > 1000):
|
| 31 |
+
return redirect(url_for('views.error', type_of_error='amount'))
|
| 32 |
+
elif data.get('type') not in ['hot', 'new', 'rising', 'top']:
|
| 33 |
+
print(data.get('type'))
|
| 34 |
+
return redirect(url_for('views.error', type_of_error='type'))
|
| 35 |
+
elif not reddit.subreddit_exists(data.get('subreddit')):
|
| 36 |
+
return redirect(url_for('views.error', type_of_error='subreddit'))
|
| 37 |
+
else:
|
| 38 |
+
# Retrieve subreddits
|
| 39 |
+
subreddits_df = retrieve_subreddits(data=data)
|
| 40 |
+
# Topic modelling using BERTtopic
|
| 41 |
+
_, _ = topic_model.fit_transform(subreddits_df.Text)
|
| 42 |
+
topics_df = topic_model.get_topic_info()
|
| 43 |
+
for t in topics_df.Topic:
|
| 44 |
+
topics_df.loc[topics_df.Topic == t, 'Top words'] = str([w for w, p in topic_model.get_topic(t)])
|
| 45 |
+
# Donwload topics
|
| 46 |
+
topics_df.to_csv(os.path.join(DOWNLOADS_PATH, 'topics.csv'), index=False)
|
| 47 |
+
send_from_directory(
|
| 48 |
+
directory = DOWNLOADS_PATH,
|
| 49 |
+
path = 'topics.csv',
|
| 50 |
+
as_attachment=True,
|
| 51 |
+
)
|
| 52 |
+
# Download docs info
|
| 53 |
+
docs_df = topic_model.get_document_info(subreddits_df.Text)
|
| 54 |
+
docs_df.to_csv(os.path.join(DOWNLOADS_PATH, 'docs_with_topics_info.csv'), index=False)
|
| 55 |
+
send_from_directory(
|
| 56 |
+
directory = DOWNLOADS_PATH,
|
| 57 |
+
path = 'docs_with_topics_info.csv',
|
| 58 |
+
as_attachment=True,
|
| 59 |
+
)
|
| 60 |
+
return redirect(url_for('views.success'))
|
| 61 |
+
|
| 62 |
+
return render_template('index.html')
|
| 63 |
+
|
| 64 |
+
@views.route('/succes', methods=['GET'])
|
| 65 |
+
def success():
|
| 66 |
+
return render_template('success.html')
|
| 67 |
+
|
| 68 |
+
@views.route('/error/<type_of_error>', methods=['GET'])
|
| 69 |
+
def error(type_of_error: str):
|
| 70 |
+
if type_of_error == 'amount':
|
| 71 |
+
return render_template('error.html', type_of_error='The amount is higher than 1000 or lower than 0')
|
| 72 |
+
elif type_of_error == 'type':
|
| 73 |
+
return render_template('error.html', type_of_error='The ordering is not within hot, rising, new, top')
|
| 74 |
+
elif type_of_error == 'subreddit':
|
| 75 |
+
return render_template('error.html', type_of_error='The subreddit does not exist')
|