File size: 5,282 Bytes
06f2cdc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# app.py

from flask import Flask, render_template, request, Response, redirect, url_for, flash, jsonify
import os
import tempfile
from reddit_scraper import scrape_reddit_user
from gemini_processor import process_content
from dotenv import load_dotenv
import threading
import uuid

# Load environment variables
load_dotenv()

app = Flask(__name__)
app.secret_key = os.getenv("SECRET_KEY") or 'default_secret_key'  # Replace with a strong secret key

# Global dictionary to track tasks
tasks = {}

def background_task(username, task_id):
    """

    Background task to scrape Reddit data and process it through Gemini API.

    Updates the tasks dictionary with progress.

    """
    try:
        tasks[task_id]['progress'] = 'Scraping Reddit data...'
        scraped_data = scrape_reddit_user(username, task_id, tasks)
        if not scraped_data:
            tasks[task_id]['progress'] = 'Failed to scrape Reddit data.'
            tasks[task_id]['status'] = 'Failed'
            return

        tasks[task_id]['progress'] = 'Processing data through Gemini API...'
        structured_report_path = process_content(username, scraped_data, task_id, tasks)
        if not structured_report_path or not os.path.exists(structured_report_path):
            tasks[task_id]['progress'] = 'Failed to process data with Gemini API.'
            tasks[task_id]['status'] = 'Failed'
            return

        tasks[task_id]['progress'] = 'Report generated successfully.'
        tasks[task_id]['status'] = 'Completed'
        tasks[task_id]['report_path'] = structured_report_path

    except Exception as e:
        print(f"Error in background task: {e}")
        tasks[task_id]['progress'] = 'An unexpected error occurred.'
        tasks[task_id]['status'] = 'Failed'

def get_unique_task_id():
    return uuid.uuid4().hex

@app.route('/', methods=['GET', 'POST'])
def index():
    if request.method == 'POST':
        reddit_username = request.form.get('reddit_username', '').strip()
        if not reddit_username:
            flash('Please enter a Reddit username.', 'danger')
            return redirect(url_for('index'))

        # Generate a unique task ID
        task_id = get_unique_task_id()
        tasks[task_id] = {
            'progress': 'Task started.',
            'status': 'In Progress',
            'report_path': None,
            'total_posts': 0,
            'scraped_posts': 0,
            'total_comments': 0,
            'scraped_comments': 0
        }

        # Start background thread
        thread = threading.Thread(target=background_task, args=(reddit_username, task_id))
        thread.start()

        flash('Your request is being processed. Please wait...', 'info')
        return redirect(url_for('progress_page', task_id=task_id))

    return render_template('index.html')

@app.route('/progress/<task_id>', methods=['GET'])
def progress_page(task_id):
    """

    Render the progress page with a progress bar.

    """
    if task_id not in tasks:
        flash('Invalid task ID.', 'danger')
        return redirect(url_for('index'))
    return render_template('progress.html', task_id=task_id)

@app.route('/status/<task_id>', methods=['GET'])
def status(task_id):
    """

    Endpoint to get the current status of the task.

    """
    if task_id not in tasks:
        return jsonify({'status': 'Invalid task ID.'}), 404

    task = tasks[task_id]
    total_posts = task.get('total_posts', 0)
    scraped_posts = task.get('scraped_posts', 0)
    total_comments = task.get('total_comments', 0)
    scraped_comments = task.get('scraped_comments', 0)

    return jsonify({
        'status': task.get('status', 'Unknown'),
        'progress': task.get('progress', ''),
        'total_posts': total_posts,
        'scraped_posts': scraped_posts,
        'total_comments': total_comments,
        'scraped_comments': scraped_comments
    })

@app.route('/download/<task_id>', methods=['GET'])
def download(task_id):
    """

    Endpoint to download the generated report.

    """
    if task_id not in tasks:
        flash('Invalid task ID.', 'danger')
        return redirect(url_for('index'))
    if tasks[task_id]['status'] != 'Completed':
        flash('Report is not ready yet.', 'warning')
        return redirect(url_for('progress_page', task_id=task_id))

    report_path = tasks[task_id]['report_path']
    if not report_path or not os.path.exists(report_path):
        flash('Report file not found.', 'danger')
        return redirect(url_for('index'))

    # Define a generator to stream the file and delete it after sending
    def generate():
        with open(report_path, 'rb') as f:
            while True:
                chunk = f.read(4096)
                if not chunk:
                    break
                yield chunk
        # Delete the file after streaming
        os.remove(report_path)
        # Remove the task from the dictionary
        del tasks[task_id]

    return Response(generate(), mimetype='text/markdown', headers={
        'Content-Disposition': f'attachment; filename="{os.path.basename(report_path)}"'
    })

if __name__ == '__main__':
    app.run(debug=True)