|
|
|
|
| from flask import Flask, render_template, request, Response, redirect, url_for, flash, jsonify
|
| import os
|
| import tempfile
|
| from reddit_scraper import scrape_reddit_user
|
| from gemini_processor import process_content
|
| from dotenv import load_dotenv
|
| import threading
|
| import uuid
|
|
|
|
|
| load_dotenv()
|
|
|
| app = Flask(__name__)
|
| app.secret_key = os.getenv("SECRET_KEY") or 'default_secret_key'
|
|
|
|
|
| tasks = {}
|
|
|
| def background_task(username, task_id):
|
| """
|
| Background task to scrape Reddit data and process it through Gemini API.
|
| Updates the tasks dictionary with progress.
|
| """
|
| try:
|
| tasks[task_id]['progress'] = 'Scraping Reddit data...'
|
| scraped_data = scrape_reddit_user(username, task_id, tasks)
|
| if not scraped_data:
|
| tasks[task_id]['progress'] = 'Failed to scrape Reddit data.'
|
| tasks[task_id]['status'] = 'Failed'
|
| return
|
|
|
| tasks[task_id]['progress'] = 'Processing data through Gemini API...'
|
| structured_report_path = process_content(username, scraped_data, task_id, tasks)
|
| if not structured_report_path or not os.path.exists(structured_report_path):
|
| tasks[task_id]['progress'] = 'Failed to process data with Gemini API.'
|
| tasks[task_id]['status'] = 'Failed'
|
| return
|
|
|
| tasks[task_id]['progress'] = 'Report generated successfully.'
|
| tasks[task_id]['status'] = 'Completed'
|
| tasks[task_id]['report_path'] = structured_report_path
|
|
|
| except Exception as e:
|
| print(f"Error in background task: {e}")
|
| tasks[task_id]['progress'] = 'An unexpected error occurred.'
|
| tasks[task_id]['status'] = 'Failed'
|
|
|
| def get_unique_task_id():
|
| return uuid.uuid4().hex
|
|
|
| @app.route('/', methods=['GET', 'POST'])
|
| def index():
|
| if request.method == 'POST':
|
| reddit_username = request.form.get('reddit_username', '').strip()
|
| if not reddit_username:
|
| flash('Please enter a Reddit username.', 'danger')
|
| return redirect(url_for('index'))
|
|
|
|
|
| task_id = get_unique_task_id()
|
| tasks[task_id] = {
|
| 'progress': 'Task started.',
|
| 'status': 'In Progress',
|
| 'report_path': None,
|
| 'total_posts': 0,
|
| 'scraped_posts': 0,
|
| 'total_comments': 0,
|
| 'scraped_comments': 0
|
| }
|
|
|
|
|
| thread = threading.Thread(target=background_task, args=(reddit_username, task_id))
|
| thread.start()
|
|
|
| flash('Your request is being processed. Please wait...', 'info')
|
| return redirect(url_for('progress_page', task_id=task_id))
|
|
|
| return render_template('index.html')
|
|
|
| @app.route('/progress/<task_id>', methods=['GET'])
|
| def progress_page(task_id):
|
| """
|
| Render the progress page with a progress bar.
|
| """
|
| if task_id not in tasks:
|
| flash('Invalid task ID.', 'danger')
|
| return redirect(url_for('index'))
|
| return render_template('progress.html', task_id=task_id)
|
|
|
| @app.route('/status/<task_id>', methods=['GET'])
|
| def status(task_id):
|
| """
|
| Endpoint to get the current status of the task.
|
| """
|
| if task_id not in tasks:
|
| return jsonify({'status': 'Invalid task ID.'}), 404
|
|
|
| task = tasks[task_id]
|
| total_posts = task.get('total_posts', 0)
|
| scraped_posts = task.get('scraped_posts', 0)
|
| total_comments = task.get('total_comments', 0)
|
| scraped_comments = task.get('scraped_comments', 0)
|
|
|
| return jsonify({
|
| 'status': task.get('status', 'Unknown'),
|
| 'progress': task.get('progress', ''),
|
| 'total_posts': total_posts,
|
| 'scraped_posts': scraped_posts,
|
| 'total_comments': total_comments,
|
| 'scraped_comments': scraped_comments
|
| })
|
|
|
| @app.route('/download/<task_id>', methods=['GET'])
|
| def download(task_id):
|
| """
|
| Endpoint to download the generated report.
|
| """
|
| if task_id not in tasks:
|
| flash('Invalid task ID.', 'danger')
|
| return redirect(url_for('index'))
|
| if tasks[task_id]['status'] != 'Completed':
|
| flash('Report is not ready yet.', 'warning')
|
| return redirect(url_for('progress_page', task_id=task_id))
|
|
|
| report_path = tasks[task_id]['report_path']
|
| if not report_path or not os.path.exists(report_path):
|
| flash('Report file not found.', 'danger')
|
| return redirect(url_for('index'))
|
|
|
|
|
| def generate():
|
| with open(report_path, 'rb') as f:
|
| while True:
|
| chunk = f.read(4096)
|
| if not chunk:
|
| break
|
| yield chunk
|
|
|
| os.remove(report_path)
|
|
|
| del tasks[task_id]
|
|
|
| return Response(generate(), mimetype='text/markdown', headers={
|
| 'Content-Disposition': f'attachment; filename="{os.path.basename(report_path)}"'
|
| })
|
|
|
| if __name__ == '__main__':
|
| app.run(debug=True)
|
|
|