File size: 5,282 Bytes
06f2cdc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | # app.py
from flask import Flask, render_template, request, Response, redirect, url_for, flash, jsonify
import os
import tempfile
from reddit_scraper import scrape_reddit_user
from gemini_processor import process_content
from dotenv import load_dotenv
import threading
import uuid
# Load environment variables
load_dotenv()
app = Flask(__name__)
app.secret_key = os.getenv("SECRET_KEY") or 'default_secret_key' # Replace with a strong secret key
# Global dictionary to track tasks
tasks = {}
def background_task(username, task_id):
"""
Background task to scrape Reddit data and process it through Gemini API.
Updates the tasks dictionary with progress.
"""
try:
tasks[task_id]['progress'] = 'Scraping Reddit data...'
scraped_data = scrape_reddit_user(username, task_id, tasks)
if not scraped_data:
tasks[task_id]['progress'] = 'Failed to scrape Reddit data.'
tasks[task_id]['status'] = 'Failed'
return
tasks[task_id]['progress'] = 'Processing data through Gemini API...'
structured_report_path = process_content(username, scraped_data, task_id, tasks)
if not structured_report_path or not os.path.exists(structured_report_path):
tasks[task_id]['progress'] = 'Failed to process data with Gemini API.'
tasks[task_id]['status'] = 'Failed'
return
tasks[task_id]['progress'] = 'Report generated successfully.'
tasks[task_id]['status'] = 'Completed'
tasks[task_id]['report_path'] = structured_report_path
except Exception as e:
print(f"Error in background task: {e}")
tasks[task_id]['progress'] = 'An unexpected error occurred.'
tasks[task_id]['status'] = 'Failed'
def get_unique_task_id():
return uuid.uuid4().hex
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'POST':
reddit_username = request.form.get('reddit_username', '').strip()
if not reddit_username:
flash('Please enter a Reddit username.', 'danger')
return redirect(url_for('index'))
# Generate a unique task ID
task_id = get_unique_task_id()
tasks[task_id] = {
'progress': 'Task started.',
'status': 'In Progress',
'report_path': None,
'total_posts': 0,
'scraped_posts': 0,
'total_comments': 0,
'scraped_comments': 0
}
# Start background thread
thread = threading.Thread(target=background_task, args=(reddit_username, task_id))
thread.start()
flash('Your request is being processed. Please wait...', 'info')
return redirect(url_for('progress_page', task_id=task_id))
return render_template('index.html')
@app.route('/progress/<task_id>', methods=['GET'])
def progress_page(task_id):
"""
Render the progress page with a progress bar.
"""
if task_id not in tasks:
flash('Invalid task ID.', 'danger')
return redirect(url_for('index'))
return render_template('progress.html', task_id=task_id)
@app.route('/status/<task_id>', methods=['GET'])
def status(task_id):
"""
Endpoint to get the current status of the task.
"""
if task_id not in tasks:
return jsonify({'status': 'Invalid task ID.'}), 404
task = tasks[task_id]
total_posts = task.get('total_posts', 0)
scraped_posts = task.get('scraped_posts', 0)
total_comments = task.get('total_comments', 0)
scraped_comments = task.get('scraped_comments', 0)
return jsonify({
'status': task.get('status', 'Unknown'),
'progress': task.get('progress', ''),
'total_posts': total_posts,
'scraped_posts': scraped_posts,
'total_comments': total_comments,
'scraped_comments': scraped_comments
})
@app.route('/download/<task_id>', methods=['GET'])
def download(task_id):
"""
Endpoint to download the generated report.
"""
if task_id not in tasks:
flash('Invalid task ID.', 'danger')
return redirect(url_for('index'))
if tasks[task_id]['status'] != 'Completed':
flash('Report is not ready yet.', 'warning')
return redirect(url_for('progress_page', task_id=task_id))
report_path = tasks[task_id]['report_path']
if not report_path or not os.path.exists(report_path):
flash('Report file not found.', 'danger')
return redirect(url_for('index'))
# Define a generator to stream the file and delete it after sending
def generate():
with open(report_path, 'rb') as f:
while True:
chunk = f.read(4096)
if not chunk:
break
yield chunk
# Delete the file after streaming
os.remove(report_path)
# Remove the task from the dictionary
del tasks[task_id]
return Response(generate(), mimetype='text/markdown', headers={
'Content-Disposition': f'attachment; filename="{os.path.basename(report_path)}"'
})
if __name__ == '__main__':
app.run(debug=True)
|