Spaces:
Sleeping
Sleeping
| import os | |
| import pandas as pd | |
| from flask import Flask, render_template, request, redirect, url_for, flash, jsonify | |
| from werkzeug.utils import secure_filename | |
| import io | |
| from dotenv import load_dotenv | |
| import logging | |
| import sys | |
| # Load utilities | |
| from utils.preprocess import preprocess_csv | |
| from utils.categorizer import categorize_titles | |
| from utils.analyzer import analyze_niches | |
| from utils.generator import generate_titles | |
| from utils.db_manager import ( | |
| init_db, | |
| save_processed_data, | |
| save_categorized_data, | |
| save_analysis_results, | |
| save_generated_titles, | |
| get_file_from_db | |
| ) | |
| # Load environment variables | |
| load_dotenv() | |
| # Configure logging to stdout only | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.StreamHandler(sys.stdout) | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| app = Flask(__name__) | |
| app.secret_key = os.urandom(24) | |
| # ALLOWED_EXTENSIONS for file uploads | |
| ALLOWED_EXTENSIONS = {'csv'} | |
| # Initialize database function - will be called after app creation | |
| def init_app(app): | |
| with app.app_context(): | |
| init_db() | |
| def allowed_file(filename): | |
| return '.' in filename and \ | |
| filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| def index(): | |
| logger.info("User accessed the home page") | |
| # Get the most recent file IDs for each type of file | |
| try: | |
| # Get most recent preprocessed file | |
| preprocessed_file = get_file_from_db(file_type='preprocessed', latest=True) | |
| preprocessed_file_id = preprocessed_file.get('id') if preprocessed_file else None | |
| # Get most recent categorized file | |
| categorized_file = get_file_from_db(file_type='categorized', latest=True) | |
| categorized_file_id = categorized_file.get('id') if categorized_file else None | |
| # Get most recent niche analysis file | |
| niche_file = get_file_from_db(file_type='niche_analysis', latest=True) | |
| niche_file_id = niche_file.get('id') if niche_file else None | |
| # Get most recent subniche analysis file | |
| subniche_file = get_file_from_db(file_type='subniche_analysis', latest=True) | |
| subniche_file_id = subniche_file.get('id') if subniche_file else None | |
| # Get most recent generated titles file | |
| generated_file = get_file_from_db(file_type='generated', latest=True) | |
| generated_file_id = generated_file.get('id') if generated_file else None | |
| return render_template('index.html', | |
| preprocessed_file_id=preprocessed_file_id, | |
| categorized_file_id=categorized_file_id, | |
| niche_file_id=niche_file_id, | |
| subniche_file_id=subniche_file_id, | |
| generated_file_id=generated_file_id) | |
| except Exception as e: | |
| logger.error(f"Error loading file IDs for index page: {e}") | |
| return render_template('index.html') | |
| def upload_file(): | |
| logger.info("User attempting to upload a file") | |
| if 'file' not in request.files: | |
| logger.warning("No file part in request") | |
| flash('Không có file nào được cung cấp!', 'error') | |
| return redirect(request.url) | |
| file = request.files['file'] | |
| if file.filename == '': | |
| logger.warning("No file selected") | |
| flash('Không có file nào được chọn!', 'error') | |
| return redirect(request.url) | |
| if file and allowed_file(file.filename): | |
| filename = secure_filename(file.filename) | |
| # Read file directly into memory | |
| file_data = file.read() | |
| # Store file data in session or directly in database | |
| file_id = save_processed_data(filename, file_data, 'raw') | |
| logger.info(f"File uploaded successfully: {filename}, ID: {file_id}") | |
| flash(f'File {filename} đã được tải lên thành công!', 'success') | |
| return redirect(url_for('process', filename=filename, file_id=file_id)) | |
| else: | |
| logger.warning(f"Invalid file type: {file.filename}") | |
| flash('File không đúng định dạng (chỉ chấp nhận .csv)', 'error') | |
| return redirect(request.url) | |
| def process(filename): | |
| file_id = request.args.get('file_id') | |
| logger.info(f"User accessing process page for file: {filename}, ID: {file_id}") | |
| if not file_id: | |
| logger.warning(f"No file ID provided") | |
| flash('File ID không tồn tại!', 'error') | |
| return redirect(url_for('index')) | |
| # Get additional file IDs from query params if available | |
| preprocessed_file_id = request.args.get('preprocessed_file_id') | |
| categorized_file_id = request.args.get('categorized_file_id') | |
| niche_file_id = request.args.get('niche_file_id') | |
| subniche_file_id = request.args.get('subniche_file_id') | |
| generated_file_id = request.args.get('generated_file_id') | |
| # If not provided in query params, get the most recent file IDs | |
| if not preprocessed_file_id: | |
| preprocessed_file = get_file_from_db(file_type='preprocessed', latest=True) | |
| preprocessed_file_id = preprocessed_file.get('id') if preprocessed_file else None | |
| if not categorized_file_id: | |
| categorized_file = get_file_from_db(file_type='categorized', latest=True) | |
| categorized_file_id = categorized_file.get('id') if categorized_file else None | |
| if not niche_file_id: | |
| niche_file = get_file_from_db(file_type='niche_analysis', latest=True) | |
| niche_file_id = niche_file.get('id') if niche_file else None | |
| if not subniche_file_id: | |
| subniche_file = get_file_from_db(file_type='subniche_analysis', latest=True) | |
| subniche_file_id = subniche_file.get('id') if subniche_file else None | |
| if not generated_file_id: | |
| generated_file = get_file_from_db(file_type='generated', latest=True) | |
| generated_file_id = generated_file.get('id') if generated_file else None | |
| return render_template('process.html', | |
| filename=filename, | |
| file_id=file_id, | |
| preprocessed_file_id=preprocessed_file_id, | |
| categorized_file_id=categorized_file_id, | |
| niche_file_id=niche_file_id, | |
| subniche_file_id=subniche_file_id, | |
| generated_file_id=generated_file_id) | |
| def run_preprocess(): | |
| file_id = request.form.get('file_id') | |
| logger.info(f"User initiated preprocessing for file ID: {file_id}") | |
| if not file_id: | |
| logger.warning(f"No file ID provided") | |
| flash('File ID không tồn tại!', 'error') | |
| return redirect(url_for('index')) | |
| try: | |
| # Get the file data from database | |
| file_data = get_file_from_db(file_id) | |
| if not file_data: | |
| raise ValueError(f"Could not retrieve file with ID {file_id}") | |
| filename = file_data.get('filename') | |
| content = file_data.get('content') | |
| # Run preprocessing directly on the data in memory | |
| logger.info(f"Starting preprocessing for {filename}") | |
| # Convert bytes to file-like object for pandas | |
| file_obj = io.BytesIO(content) | |
| # Process the data | |
| preprocessed_df = preprocess_csv(file_obj) | |
| # Convert the preprocessed dataframe to CSV in memory | |
| preprocessed_csv = io.BytesIO() | |
| preprocessed_df.to_csv(preprocessed_csv, index=False) | |
| preprocessed_csv.seek(0) | |
| # Save to database directly | |
| logger.info("Saving preprocessed file to database") | |
| new_file_id = save_processed_data(f"preprocessed_{filename}", preprocessed_csv.getvalue(), 'preprocessed') | |
| logger.info(f"Preprocessed file saved with ID: {new_file_id}") | |
| flash('Tiền xử lý hoàn tất!', 'success') | |
| return redirect(url_for('process', filename=filename, file_id=new_file_id, preprocessed_file_id=new_file_id)) | |
| except Exception as e: | |
| logger.error(f"Error preprocessing file: {str(e)}", exc_info=True) | |
| flash(f'Lỗi khi tiền xử lý: {str(e)}', 'error') | |
| return redirect(url_for('index')) | |
| def run_categorize(): | |
| file_id = request.form.get('file_id') | |
| batch_size = int(request.form.get('batch_size', 10)) | |
| batch_delay = int(request.form.get('batch_delay', 30)) | |
| logger.info(f"User initiated categorization for file ID: {file_id}") | |
| logger.info(f"Parameters: batch_size={batch_size}, batch_delay={batch_delay}") | |
| if not file_id: | |
| logger.warning(f"No file ID provided") | |
| flash('File ID không tồn tại!', 'error') | |
| return redirect(url_for('index')) | |
| try: | |
| # Get the file data from database | |
| file_data = get_file_from_db(file_id) | |
| if not file_data: | |
| raise ValueError(f"Could not retrieve file with ID {file_id}") | |
| filename = file_data.get('filename') | |
| content = file_data.get('content') | |
| # Convert bytes to file-like object for processing | |
| file_obj = io.BytesIO(content) | |
| # Check API keys | |
| from utils.api_manager import ApiKeyManager | |
| api_manager = ApiKeyManager() | |
| logger.info(f"API Key Manager initialized with {len(api_manager.api_keys)} keys") | |
| # Add a log to check the first API key (partially obscured for security) | |
| if api_manager.api_keys: | |
| first_key = api_manager.api_keys[0] | |
| masked_key = first_key[:10] + "..." if len(first_key) > 10 else "..." | |
| logger.info(f"First API key available: {masked_key}") | |
| # Process directly in memory | |
| categorized_df, successful_rows, failed_rows = categorize_titles(file_obj, batch_size, batch_delay) | |
| # Convert the categorized dataframe to CSV in memory | |
| categorized_csv = io.BytesIO() | |
| categorized_df.to_csv(categorized_csv, index=False) | |
| categorized_csv.seek(0) | |
| # Save to database | |
| logger.info(f"Categorization complete. Successful rows: {successful_rows}, Failed rows: {failed_rows}") | |
| logger.info("Saving categorized file to database") | |
| new_file_id = save_categorized_data(f"categorized_{filename}", categorized_csv.getvalue(), 'categorized') | |
| logger.info(f"Categorized file saved with ID: {new_file_id}") | |
| flash('Phân loại tiêu đề hoàn tất!', 'success') | |
| return redirect(url_for('process', filename=filename, file_id=new_file_id, categorized_file_id=new_file_id)) | |
| except Exception as e: | |
| logger.error(f"Error categorizing file: {str(e)}", exc_info=True) | |
| flash(f'Lỗi khi phân loại: {str(e)}', 'error') | |
| return redirect(url_for('index')) | |
| def run_analyze(): | |
| file_id = request.form.get('file_id') | |
| logger.info(f"User initiated analysis for file ID: {file_id}") | |
| if not file_id: | |
| logger.warning(f"No file ID provided") | |
| flash('File ID không tồn tại!', 'error') | |
| return redirect(url_for('index')) | |
| try: | |
| # Get the file data from database | |
| file_data = get_file_from_db(file_id) | |
| if not file_data: | |
| raise ValueError(f"Could not retrieve file with ID {file_id}") | |
| filename = file_data.get('filename') | |
| content = file_data.get('content') | |
| # Convert bytes to file-like object for processing | |
| file_obj = io.BytesIO(content) | |
| logger.info(f"Starting analysis for {filename}") | |
| # Process directly in memory | |
| niche_df, subniche_df = analyze_niches(file_obj) | |
| # Convert to CSV in memory | |
| niche_csv = io.BytesIO() | |
| subniche_csv = io.BytesIO() | |
| niche_df.to_csv(niche_csv, index=False) | |
| subniche_df.to_csv(subniche_csv, index=False) | |
| niche_csv.seek(0) | |
| subniche_csv.seek(0) | |
| # Save to database | |
| logger.info("Saving analysis results to database") | |
| niche_id = save_processed_data("Niche_Ranking_Analysis.csv", niche_csv.getvalue(), 'niche_analysis') | |
| subniche_id = save_processed_data("Subniche_Analysis.csv", subniche_csv.getvalue(), 'subniche_analysis') | |
| logger.info(f"Analysis saved with IDs: niche={niche_id}, subniche={subniche_id}") | |
| flash('Phân tích hoàn tất!', 'success') | |
| return redirect(url_for('process', filename=filename, file_id=file_id, niche_file_id=niche_id, subniche_file_id=subniche_id)) | |
| except Exception as e: | |
| logger.error(f"Error analyzing data: {str(e)}", exc_info=True) | |
| flash(f'Lỗi khi phân tích: {str(e)}', 'error') | |
| return redirect(url_for('index')) | |
| def run_generate(): | |
| niche_file_id = request.form.get('niche_file_id') | |
| logger.info(f"User initiated title generation using niche file ID: {niche_file_id}") | |
| if not niche_file_id: | |
| logger.warning(f"Niche file ID not provided") | |
| flash('File phân tích niche không tồn tại!', 'error') | |
| return redirect(url_for('index')) | |
| try: | |
| # Get the niche analysis file from database | |
| niche_data = get_file_from_db(niche_file_id) | |
| if not niche_data: | |
| raise ValueError(f"Could not retrieve niche analysis with ID {niche_file_id}") | |
| niche_content = niche_data.get('content') | |
| niche_obj = io.BytesIO(niche_content) | |
| # Run title generation | |
| top_niches = int(request.form.get('top_niches', 5)) | |
| bottom_subniches = int(request.form.get('bottom_subniches', 2)) | |
| titles_per_combination = int(request.form.get('titles_per_combination', 2)) | |
| logger.info(f"Starting title generation with parameters:") | |
| logger.info(f"- Top niches: {top_niches}") | |
| logger.info(f"- Bottom subniches: {bottom_subniches}") | |
| logger.info(f"- Titles per combination: {titles_per_combination}") | |
| titles_df = generate_titles( | |
| niche_obj, | |
| top_niches, | |
| bottom_subniches, | |
| titles_per_combination | |
| ) | |
| # Convert to CSV in memory | |
| titles_csv = io.BytesIO() | |
| titles_df.to_csv(titles_csv, index=False) | |
| titles_csv.seek(0) | |
| # Save to database | |
| logger.info("Saving generated titles to database") | |
| titles_id = save_generated_titles("Generated_Titles.csv", titles_csv.getvalue()) | |
| logger.info(f"Generated titles saved with ID: {titles_id}") | |
| flash('Tạo tiêu đề hoàn tất!', 'success') | |
| # Find the original file_id to link back to the process page | |
| # We'll use the niche analysis file's metadata to find the original file | |
| original_file_id = niche_file_id | |
| return redirect(url_for('process', | |
| filename="Generated_Titles.csv", | |
| file_id=original_file_id, | |
| generated_file_id=titles_id, | |
| niche_file_id=niche_file_id)) | |
| except Exception as e: | |
| logger.error(f"Error generating titles: {str(e)}", exc_info=True) | |
| flash(f'Lỗi khi tạo tiêu đề: {str(e)}', 'error') | |
| return redirect(url_for('index')) | |
| def download_file(file_id): | |
| try: | |
| # Get the file from database | |
| file_data = get_file_from_db(file_id) | |
| if not file_data: | |
| logger.warning(f"File not found with ID: {file_id}") | |
| flash('File không tồn tại!', 'error') | |
| return redirect(url_for('index')) | |
| filename = file_data.get('filename') | |
| content = file_data.get('content') | |
| # Return the file as a download | |
| response = app.response_class( | |
| content, | |
| mimetype='text/csv', | |
| headers={'Content-Disposition': f'attachment;filename={filename}'} | |
| ) | |
| logger.info(f"User downloading file: {filename}") | |
| return response | |
| except Exception as e: | |
| logger.error(f"Error downloading file: {str(e)}", exc_info=True) | |
| flash(f'Lỗi khi tải file: {str(e)}', 'error') | |
| return redirect(url_for('index')) | |
| if __name__ == '__main__': | |
| # Run database initialization | |
| init_app(app) | |
| # Get port from environment variable or use 7860 as default | |
| port = int(os.environ.get('PORT', 7860)) | |
| app.run(host='0.0.0.0', port=port, debug=True) |