AI-Agent-Book / app.py
Cuong2004's picture
init project
ded29b0
import os
import pandas as pd
from flask import Flask, render_template, request, redirect, url_for, flash, jsonify
from werkzeug.utils import secure_filename
import io
from dotenv import load_dotenv
import logging
import sys
# Load utilities
from utils.preprocess import preprocess_csv
from utils.categorizer import categorize_titles
from utils.analyzer import analyze_niches
from utils.generator import generate_titles
from utils.db_manager import (
init_db,
save_processed_data,
save_categorized_data,
save_analysis_results,
save_generated_titles,
get_file_from_db
)
# Load environment variables
load_dotenv()
# Configure logging to stdout only
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
app = Flask(__name__)
app.secret_key = os.urandom(24)
# ALLOWED_EXTENSIONS for file uploads
ALLOWED_EXTENSIONS = {'csv'}
# Initialize database function - will be called after app creation
def init_app(app):
with app.app_context():
init_db()
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route('/')
def index():
logger.info("User accessed the home page")
# Get the most recent file IDs for each type of file
try:
# Get most recent preprocessed file
preprocessed_file = get_file_from_db(file_type='preprocessed', latest=True)
preprocessed_file_id = preprocessed_file.get('id') if preprocessed_file else None
# Get most recent categorized file
categorized_file = get_file_from_db(file_type='categorized', latest=True)
categorized_file_id = categorized_file.get('id') if categorized_file else None
# Get most recent niche analysis file
niche_file = get_file_from_db(file_type='niche_analysis', latest=True)
niche_file_id = niche_file.get('id') if niche_file else None
# Get most recent subniche analysis file
subniche_file = get_file_from_db(file_type='subniche_analysis', latest=True)
subniche_file_id = subniche_file.get('id') if subniche_file else None
# Get most recent generated titles file
generated_file = get_file_from_db(file_type='generated', latest=True)
generated_file_id = generated_file.get('id') if generated_file else None
return render_template('index.html',
preprocessed_file_id=preprocessed_file_id,
categorized_file_id=categorized_file_id,
niche_file_id=niche_file_id,
subniche_file_id=subniche_file_id,
generated_file_id=generated_file_id)
except Exception as e:
logger.error(f"Error loading file IDs for index page: {e}")
return render_template('index.html')
@app.route('/upload', methods=['POST'])
def upload_file():
logger.info("User attempting to upload a file")
if 'file' not in request.files:
logger.warning("No file part in request")
flash('Không có file nào được cung cấp!', 'error')
return redirect(request.url)
file = request.files['file']
if file.filename == '':
logger.warning("No file selected")
flash('Không có file nào được chọn!', 'error')
return redirect(request.url)
if file and allowed_file(file.filename):
filename = secure_filename(file.filename)
# Read file directly into memory
file_data = file.read()
# Store file data in session or directly in database
file_id = save_processed_data(filename, file_data, 'raw')
logger.info(f"File uploaded successfully: {filename}, ID: {file_id}")
flash(f'File {filename} đã được tải lên thành công!', 'success')
return redirect(url_for('process', filename=filename, file_id=file_id))
else:
logger.warning(f"Invalid file type: {file.filename}")
flash('File không đúng định dạng (chỉ chấp nhận .csv)', 'error')
return redirect(request.url)
@app.route('/process/<filename>')
def process(filename):
file_id = request.args.get('file_id')
logger.info(f"User accessing process page for file: {filename}, ID: {file_id}")
if not file_id:
logger.warning(f"No file ID provided")
flash('File ID không tồn tại!', 'error')
return redirect(url_for('index'))
# Get additional file IDs from query params if available
preprocessed_file_id = request.args.get('preprocessed_file_id')
categorized_file_id = request.args.get('categorized_file_id')
niche_file_id = request.args.get('niche_file_id')
subniche_file_id = request.args.get('subniche_file_id')
generated_file_id = request.args.get('generated_file_id')
# If not provided in query params, get the most recent file IDs
if not preprocessed_file_id:
preprocessed_file = get_file_from_db(file_type='preprocessed', latest=True)
preprocessed_file_id = preprocessed_file.get('id') if preprocessed_file else None
if not categorized_file_id:
categorized_file = get_file_from_db(file_type='categorized', latest=True)
categorized_file_id = categorized_file.get('id') if categorized_file else None
if not niche_file_id:
niche_file = get_file_from_db(file_type='niche_analysis', latest=True)
niche_file_id = niche_file.get('id') if niche_file else None
if not subniche_file_id:
subniche_file = get_file_from_db(file_type='subniche_analysis', latest=True)
subniche_file_id = subniche_file.get('id') if subniche_file else None
if not generated_file_id:
generated_file = get_file_from_db(file_type='generated', latest=True)
generated_file_id = generated_file.get('id') if generated_file else None
return render_template('process.html',
filename=filename,
file_id=file_id,
preprocessed_file_id=preprocessed_file_id,
categorized_file_id=categorized_file_id,
niche_file_id=niche_file_id,
subniche_file_id=subniche_file_id,
generated_file_id=generated_file_id)
@app.route('/preprocess', methods=['POST'])
def run_preprocess():
file_id = request.form.get('file_id')
logger.info(f"User initiated preprocessing for file ID: {file_id}")
if not file_id:
logger.warning(f"No file ID provided")
flash('File ID không tồn tại!', 'error')
return redirect(url_for('index'))
try:
# Get the file data from database
file_data = get_file_from_db(file_id)
if not file_data:
raise ValueError(f"Could not retrieve file with ID {file_id}")
filename = file_data.get('filename')
content = file_data.get('content')
# Run preprocessing directly on the data in memory
logger.info(f"Starting preprocessing for {filename}")
# Convert bytes to file-like object for pandas
file_obj = io.BytesIO(content)
# Process the data
preprocessed_df = preprocess_csv(file_obj)
# Convert the preprocessed dataframe to CSV in memory
preprocessed_csv = io.BytesIO()
preprocessed_df.to_csv(preprocessed_csv, index=False)
preprocessed_csv.seek(0)
# Save to database directly
logger.info("Saving preprocessed file to database")
new_file_id = save_processed_data(f"preprocessed_{filename}", preprocessed_csv.getvalue(), 'preprocessed')
logger.info(f"Preprocessed file saved with ID: {new_file_id}")
flash('Tiền xử lý hoàn tất!', 'success')
return redirect(url_for('process', filename=filename, file_id=new_file_id, preprocessed_file_id=new_file_id))
except Exception as e:
logger.error(f"Error preprocessing file: {str(e)}", exc_info=True)
flash(f'Lỗi khi tiền xử lý: {str(e)}', 'error')
return redirect(url_for('index'))
@app.route('/categorize', methods=['POST'])
def run_categorize():
file_id = request.form.get('file_id')
batch_size = int(request.form.get('batch_size', 10))
batch_delay = int(request.form.get('batch_delay', 30))
logger.info(f"User initiated categorization for file ID: {file_id}")
logger.info(f"Parameters: batch_size={batch_size}, batch_delay={batch_delay}")
if not file_id:
logger.warning(f"No file ID provided")
flash('File ID không tồn tại!', 'error')
return redirect(url_for('index'))
try:
# Get the file data from database
file_data = get_file_from_db(file_id)
if not file_data:
raise ValueError(f"Could not retrieve file with ID {file_id}")
filename = file_data.get('filename')
content = file_data.get('content')
# Convert bytes to file-like object for processing
file_obj = io.BytesIO(content)
# Check API keys
from utils.api_manager import ApiKeyManager
api_manager = ApiKeyManager()
logger.info(f"API Key Manager initialized with {len(api_manager.api_keys)} keys")
# Add a log to check the first API key (partially obscured for security)
if api_manager.api_keys:
first_key = api_manager.api_keys[0]
masked_key = first_key[:10] + "..." if len(first_key) > 10 else "..."
logger.info(f"First API key available: {masked_key}")
# Process directly in memory
categorized_df, successful_rows, failed_rows = categorize_titles(file_obj, batch_size, batch_delay)
# Convert the categorized dataframe to CSV in memory
categorized_csv = io.BytesIO()
categorized_df.to_csv(categorized_csv, index=False)
categorized_csv.seek(0)
# Save to database
logger.info(f"Categorization complete. Successful rows: {successful_rows}, Failed rows: {failed_rows}")
logger.info("Saving categorized file to database")
new_file_id = save_categorized_data(f"categorized_{filename}", categorized_csv.getvalue(), 'categorized')
logger.info(f"Categorized file saved with ID: {new_file_id}")
flash('Phân loại tiêu đề hoàn tất!', 'success')
return redirect(url_for('process', filename=filename, file_id=new_file_id, categorized_file_id=new_file_id))
except Exception as e:
logger.error(f"Error categorizing file: {str(e)}", exc_info=True)
flash(f'Lỗi khi phân loại: {str(e)}', 'error')
return redirect(url_for('index'))
@app.route('/analyze', methods=['POST'])
def run_analyze():
file_id = request.form.get('file_id')
logger.info(f"User initiated analysis for file ID: {file_id}")
if not file_id:
logger.warning(f"No file ID provided")
flash('File ID không tồn tại!', 'error')
return redirect(url_for('index'))
try:
# Get the file data from database
file_data = get_file_from_db(file_id)
if not file_data:
raise ValueError(f"Could not retrieve file with ID {file_id}")
filename = file_data.get('filename')
content = file_data.get('content')
# Convert bytes to file-like object for processing
file_obj = io.BytesIO(content)
logger.info(f"Starting analysis for {filename}")
# Process directly in memory
niche_df, subniche_df = analyze_niches(file_obj)
# Convert to CSV in memory
niche_csv = io.BytesIO()
subniche_csv = io.BytesIO()
niche_df.to_csv(niche_csv, index=False)
subniche_df.to_csv(subniche_csv, index=False)
niche_csv.seek(0)
subniche_csv.seek(0)
# Save to database
logger.info("Saving analysis results to database")
niche_id = save_processed_data("Niche_Ranking_Analysis.csv", niche_csv.getvalue(), 'niche_analysis')
subniche_id = save_processed_data("Subniche_Analysis.csv", subniche_csv.getvalue(), 'subniche_analysis')
logger.info(f"Analysis saved with IDs: niche={niche_id}, subniche={subniche_id}")
flash('Phân tích hoàn tất!', 'success')
return redirect(url_for('process', filename=filename, file_id=file_id, niche_file_id=niche_id, subniche_file_id=subniche_id))
except Exception as e:
logger.error(f"Error analyzing data: {str(e)}", exc_info=True)
flash(f'Lỗi khi phân tích: {str(e)}', 'error')
return redirect(url_for('index'))
@app.route('/generate', methods=['POST'])
def run_generate():
niche_file_id = request.form.get('niche_file_id')
logger.info(f"User initiated title generation using niche file ID: {niche_file_id}")
if not niche_file_id:
logger.warning(f"Niche file ID not provided")
flash('File phân tích niche không tồn tại!', 'error')
return redirect(url_for('index'))
try:
# Get the niche analysis file from database
niche_data = get_file_from_db(niche_file_id)
if not niche_data:
raise ValueError(f"Could not retrieve niche analysis with ID {niche_file_id}")
niche_content = niche_data.get('content')
niche_obj = io.BytesIO(niche_content)
# Run title generation
top_niches = int(request.form.get('top_niches', 5))
bottom_subniches = int(request.form.get('bottom_subniches', 2))
titles_per_combination = int(request.form.get('titles_per_combination', 2))
logger.info(f"Starting title generation with parameters:")
logger.info(f"- Top niches: {top_niches}")
logger.info(f"- Bottom subniches: {bottom_subniches}")
logger.info(f"- Titles per combination: {titles_per_combination}")
titles_df = generate_titles(
niche_obj,
top_niches,
bottom_subniches,
titles_per_combination
)
# Convert to CSV in memory
titles_csv = io.BytesIO()
titles_df.to_csv(titles_csv, index=False)
titles_csv.seek(0)
# Save to database
logger.info("Saving generated titles to database")
titles_id = save_generated_titles("Generated_Titles.csv", titles_csv.getvalue())
logger.info(f"Generated titles saved with ID: {titles_id}")
flash('Tạo tiêu đề hoàn tất!', 'success')
# Find the original file_id to link back to the process page
# We'll use the niche analysis file's metadata to find the original file
original_file_id = niche_file_id
return redirect(url_for('process',
filename="Generated_Titles.csv",
file_id=original_file_id,
generated_file_id=titles_id,
niche_file_id=niche_file_id))
except Exception as e:
logger.error(f"Error generating titles: {str(e)}", exc_info=True)
flash(f'Lỗi khi tạo tiêu đề: {str(e)}', 'error')
return redirect(url_for('index'))
@app.route('/download/<file_id>')
def download_file(file_id):
try:
# Get the file from database
file_data = get_file_from_db(file_id)
if not file_data:
logger.warning(f"File not found with ID: {file_id}")
flash('File không tồn tại!', 'error')
return redirect(url_for('index'))
filename = file_data.get('filename')
content = file_data.get('content')
# Return the file as a download
response = app.response_class(
content,
mimetype='text/csv',
headers={'Content-Disposition': f'attachment;filename={filename}'}
)
logger.info(f"User downloading file: {filename}")
return response
except Exception as e:
logger.error(f"Error downloading file: {str(e)}", exc_info=True)
flash(f'Lỗi khi tải file: {str(e)}', 'error')
return redirect(url_for('index'))
if __name__ == '__main__':
# Run database initialization
init_app(app)
# Get port from environment variable or use 7860 as default
port = int(os.environ.get('PORT', 7860))
app.run(host='0.0.0.0', port=port, debug=True)