Spaces:

Cuong2004
/

AI-Agent-Book

Sleeping

App Files Files Community

AI-Agent-Book / app.py

Cuong2004

init project

ded29b0 10 months ago

raw

history blame contribute delete

16.9 kB

	import os
	import pandas as pd
	from flask import Flask, render_template, request, redirect, url_for, flash, jsonify
	from werkzeug.utils import secure_filename
	import io
	from dotenv import load_dotenv
	import logging
	import sys

	# Load utilities
	from utils.preprocess import preprocess_csv
	from utils.categorizer import categorize_titles
	from utils.analyzer import analyze_niches
	from utils.generator import generate_titles
	from utils.db_manager import (
	init_db,
	save_processed_data,
	save_categorized_data,
	save_analysis_results,
	save_generated_titles,
	get_file_from_db
	)

	# Load environment variables
	load_dotenv()

	# Configure logging to stdout only
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	handlers=[
	logging.StreamHandler(sys.stdout)
	]
	)
	logger = logging.getLogger(__name__)

	app = Flask(__name__)
	app.secret_key = os.urandom(24)

	# ALLOWED_EXTENSIONS for file uploads
	ALLOWED_EXTENSIONS = {'csv'}

	# Initialize database function - will be called after app creation
	def init_app(app):
	with app.app_context():
	init_db()

	def allowed_file(filename):
	return '.' in filename and \
	filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

	@app.route('/')
	def index():
	logger.info("User accessed the home page")

	# Get the most recent file IDs for each type of file
	try:
	# Get most recent preprocessed file
	preprocessed_file = get_file_from_db(file_type='preprocessed', latest=True)
	preprocessed_file_id = preprocessed_file.get('id') if preprocessed_file else None

	# Get most recent categorized file
	categorized_file = get_file_from_db(file_type='categorized', latest=True)
	categorized_file_id = categorized_file.get('id') if categorized_file else None

	# Get most recent niche analysis file
	niche_file = get_file_from_db(file_type='niche_analysis', latest=True)
	niche_file_id = niche_file.get('id') if niche_file else None

	# Get most recent subniche analysis file
	subniche_file = get_file_from_db(file_type='subniche_analysis', latest=True)
	subniche_file_id = subniche_file.get('id') if subniche_file else None

	# Get most recent generated titles file
	generated_file = get_file_from_db(file_type='generated', latest=True)
	generated_file_id = generated_file.get('id') if generated_file else None

	return render_template('index.html',
	preprocessed_file_id=preprocessed_file_id,
	categorized_file_id=categorized_file_id,
	niche_file_id=niche_file_id,
	subniche_file_id=subniche_file_id,
	generated_file_id=generated_file_id)
	except Exception as e:
	logger.error(f"Error loading file IDs for index page: {e}")
	return render_template('index.html')

	@app.route('/upload', methods=['POST'])
	def upload_file():
	logger.info("User attempting to upload a file")
	if 'file' not in request.files:
	logger.warning("No file part in request")
	flash('Không có file nào được cung cấp!', 'error')
	return redirect(request.url)

	file = request.files['file']
	if file.filename == '':
	logger.warning("No file selected")
	flash('Không có file nào được chọn!', 'error')
	return redirect(request.url)

	if file and allowed_file(file.filename):
	filename = secure_filename(file.filename)
	# Read file directly into memory
	file_data = file.read()

	# Store file data in session or directly in database
	file_id = save_processed_data(filename, file_data, 'raw')

	logger.info(f"File uploaded successfully: {filename}, ID: {file_id}")
	flash(f'File {filename} đã được tải lên thành công!', 'success')
	return redirect(url_for('process', filename=filename, file_id=file_id))
	else:
	logger.warning(f"Invalid file type: {file.filename}")
	flash('File không đúng định dạng (chỉ chấp nhận .csv)', 'error')
	return redirect(request.url)

	@app.route('/process/<filename>')
	def process(filename):
	file_id = request.args.get('file_id')
	logger.info(f"User accessing process page for file: {filename}, ID: {file_id}")

	if not file_id:
	logger.warning(f"No file ID provided")
	flash('File ID không tồn tại!', 'error')
	return redirect(url_for('index'))

	# Get additional file IDs from query params if available
	preprocessed_file_id = request.args.get('preprocessed_file_id')
	categorized_file_id = request.args.get('categorized_file_id')
	niche_file_id = request.args.get('niche_file_id')
	subniche_file_id = request.args.get('subniche_file_id')
	generated_file_id = request.args.get('generated_file_id')

	# If not provided in query params, get the most recent file IDs
	if not preprocessed_file_id:
	preprocessed_file = get_file_from_db(file_type='preprocessed', latest=True)
	preprocessed_file_id = preprocessed_file.get('id') if preprocessed_file else None

	if not categorized_file_id:
	categorized_file = get_file_from_db(file_type='categorized', latest=True)
	categorized_file_id = categorized_file.get('id') if categorized_file else None

	if not niche_file_id:
	niche_file = get_file_from_db(file_type='niche_analysis', latest=True)
	niche_file_id = niche_file.get('id') if niche_file else None

	if not subniche_file_id:
	subniche_file = get_file_from_db(file_type='subniche_analysis', latest=True)
	subniche_file_id = subniche_file.get('id') if subniche_file else None

	if not generated_file_id:
	generated_file = get_file_from_db(file_type='generated', latest=True)
	generated_file_id = generated_file.get('id') if generated_file else None

	return render_template('process.html',
	filename=filename,
	file_id=file_id,
	preprocessed_file_id=preprocessed_file_id,
	categorized_file_id=categorized_file_id,
	niche_file_id=niche_file_id,
	subniche_file_id=subniche_file_id,
	generated_file_id=generated_file_id)

	@app.route('/preprocess', methods=['POST'])
	def run_preprocess():
	file_id = request.form.get('file_id')
	logger.info(f"User initiated preprocessing for file ID: {file_id}")

	if not file_id:
	logger.warning(f"No file ID provided")
	flash('File ID không tồn tại!', 'error')
	return redirect(url_for('index'))

	try:
	# Get the file data from database
	file_data = get_file_from_db(file_id)
	if not file_data:
	raise ValueError(f"Could not retrieve file with ID {file_id}")

	filename = file_data.get('filename')
	content = file_data.get('content')

	# Run preprocessing directly on the data in memory
	logger.info(f"Starting preprocessing for {filename}")

	# Convert bytes to file-like object for pandas
	file_obj = io.BytesIO(content)

	# Process the data
	preprocessed_df = preprocess_csv(file_obj)

	# Convert the preprocessed dataframe to CSV in memory
	preprocessed_csv = io.BytesIO()
	preprocessed_df.to_csv(preprocessed_csv, index=False)
	preprocessed_csv.seek(0)

	# Save to database directly
	logger.info("Saving preprocessed file to database")
	new_file_id = save_processed_data(f"preprocessed_{filename}", preprocessed_csv.getvalue(), 'preprocessed')
	logger.info(f"Preprocessed file saved with ID: {new_file_id}")

	flash('Tiền xử lý hoàn tất!', 'success')
	return redirect(url_for('process', filename=filename, file_id=new_file_id, preprocessed_file_id=new_file_id))
	except Exception as e:
	logger.error(f"Error preprocessing file: {str(e)}", exc_info=True)
	flash(f'Lỗi khi tiền xử lý: {str(e)}', 'error')
	return redirect(url_for('index'))

	@app.route('/categorize', methods=['POST'])
	def run_categorize():
	file_id = request.form.get('file_id')
	batch_size = int(request.form.get('batch_size', 10))
	batch_delay = int(request.form.get('batch_delay', 30))

	logger.info(f"User initiated categorization for file ID: {file_id}")
	logger.info(f"Parameters: batch_size={batch_size}, batch_delay={batch_delay}")

	if not file_id:
	logger.warning(f"No file ID provided")
	flash('File ID không tồn tại!', 'error')
	return redirect(url_for('index'))

	try:
	# Get the file data from database
	file_data = get_file_from_db(file_id)
	if not file_data:
	raise ValueError(f"Could not retrieve file with ID {file_id}")

	filename = file_data.get('filename')
	content = file_data.get('content')

	# Convert bytes to file-like object for processing
	file_obj = io.BytesIO(content)

	# Check API keys
	from utils.api_manager import ApiKeyManager
	api_manager = ApiKeyManager()
	logger.info(f"API Key Manager initialized with {len(api_manager.api_keys)} keys")

	# Add a log to check the first API key (partially obscured for security)
	if api_manager.api_keys:
	first_key = api_manager.api_keys[0]
	masked_key = first_key[:10] + "..." if len(first_key) > 10 else "..."
	logger.info(f"First API key available: {masked_key}")

	# Process directly in memory
	categorized_df, successful_rows, failed_rows = categorize_titles(file_obj, batch_size, batch_delay)

	# Convert the categorized dataframe to CSV in memory
	categorized_csv = io.BytesIO()
	categorized_df.to_csv(categorized_csv, index=False)
	categorized_csv.seek(0)

	# Save to database
	logger.info(f"Categorization complete. Successful rows: {successful_rows}, Failed rows: {failed_rows}")
	logger.info("Saving categorized file to database")

	new_file_id = save_categorized_data(f"categorized_{filename}", categorized_csv.getvalue(), 'categorized')
	logger.info(f"Categorized file saved with ID: {new_file_id}")

	flash('Phân loại tiêu đề hoàn tất!', 'success')
	return redirect(url_for('process', filename=filename, file_id=new_file_id, categorized_file_id=new_file_id))
	except Exception as e:
	logger.error(f"Error categorizing file: {str(e)}", exc_info=True)
	flash(f'Lỗi khi phân loại: {str(e)}', 'error')
	return redirect(url_for('index'))

	@app.route('/analyze', methods=['POST'])
	def run_analyze():
	file_id = request.form.get('file_id')
	logger.info(f"User initiated analysis for file ID: {file_id}")

	if not file_id:
	logger.warning(f"No file ID provided")
	flash('File ID không tồn tại!', 'error')
	return redirect(url_for('index'))

	try:
	# Get the file data from database
	file_data = get_file_from_db(file_id)
	if not file_data:
	raise ValueError(f"Could not retrieve file with ID {file_id}")

	filename = file_data.get('filename')
	content = file_data.get('content')

	# Convert bytes to file-like object for processing
	file_obj = io.BytesIO(content)

	logger.info(f"Starting analysis for {filename}")

	# Process directly in memory
	niche_df, subniche_df = analyze_niches(file_obj)

	# Convert to CSV in memory
	niche_csv = io.BytesIO()
	subniche_csv = io.BytesIO()

	niche_df.to_csv(niche_csv, index=False)
	subniche_df.to_csv(subniche_csv, index=False)

	niche_csv.seek(0)
	subniche_csv.seek(0)

	# Save to database
	logger.info("Saving analysis results to database")
	niche_id = save_processed_data("Niche_Ranking_Analysis.csv", niche_csv.getvalue(), 'niche_analysis')
	subniche_id = save_processed_data("Subniche_Analysis.csv", subniche_csv.getvalue(), 'subniche_analysis')

	logger.info(f"Analysis saved with IDs: niche={niche_id}, subniche={subniche_id}")

	flash('Phân tích hoàn tất!', 'success')
	return redirect(url_for('process', filename=filename, file_id=file_id, niche_file_id=niche_id, subniche_file_id=subniche_id))
	except Exception as e:
	logger.error(f"Error analyzing data: {str(e)}", exc_info=True)
	flash(f'Lỗi khi phân tích: {str(e)}', 'error')
	return redirect(url_for('index'))

	@app.route('/generate', methods=['POST'])
	def run_generate():
	niche_file_id = request.form.get('niche_file_id')
	logger.info(f"User initiated title generation using niche file ID: {niche_file_id}")

	if not niche_file_id:
	logger.warning(f"Niche file ID not provided")
	flash('File phân tích niche không tồn tại!', 'error')
	return redirect(url_for('index'))

	try:
	# Get the niche analysis file from database
	niche_data = get_file_from_db(niche_file_id)
	if not niche_data:
	raise ValueError(f"Could not retrieve niche analysis with ID {niche_file_id}")

	niche_content = niche_data.get('content')
	niche_obj = io.BytesIO(niche_content)

	# Run title generation
	top_niches = int(request.form.get('top_niches', 5))
	bottom_subniches = int(request.form.get('bottom_subniches', 2))
	titles_per_combination = int(request.form.get('titles_per_combination', 2))

	logger.info(f"Starting title generation with parameters:")
	logger.info(f"- Top niches: {top_niches}")
	logger.info(f"- Bottom subniches: {bottom_subniches}")
	logger.info(f"- Titles per combination: {titles_per_combination}")

	titles_df = generate_titles(
	niche_obj,
	top_niches,
	bottom_subniches,
	titles_per_combination
	)

	# Convert to CSV in memory
	titles_csv = io.BytesIO()
	titles_df.to_csv(titles_csv, index=False)
	titles_csv.seek(0)

	# Save to database
	logger.info("Saving generated titles to database")
	titles_id = save_generated_titles("Generated_Titles.csv", titles_csv.getvalue())
	logger.info(f"Generated titles saved with ID: {titles_id}")

	flash('Tạo tiêu đề hoàn tất!', 'success')

	# Find the original file_id to link back to the process page
	# We'll use the niche analysis file's metadata to find the original file
	original_file_id = niche_file_id

	return redirect(url_for('process',
	filename="Generated_Titles.csv",
	file_id=original_file_id,
	generated_file_id=titles_id,
	niche_file_id=niche_file_id))
	except Exception as e:
	logger.error(f"Error generating titles: {str(e)}", exc_info=True)
	flash(f'Lỗi khi tạo tiêu đề: {str(e)}', 'error')
	return redirect(url_for('index'))

	@app.route('/download/<file_id>')
	def download_file(file_id):
	try:
	# Get the file from database
	file_data = get_file_from_db(file_id)
	if not file_data:
	logger.warning(f"File not found with ID: {file_id}")
	flash('File không tồn tại!', 'error')
	return redirect(url_for('index'))

	filename = file_data.get('filename')
	content = file_data.get('content')

	# Return the file as a download
	response = app.response_class(
	content,
	mimetype='text/csv',
	headers={'Content-Disposition': f'attachment;filename={filename}'}
	)

	logger.info(f"User downloading file: {filename}")
	return response
	except Exception as e:
	logger.error(f"Error downloading file: {str(e)}", exc_info=True)
	flash(f'Lỗi khi tải file: {str(e)}', 'error')
	return redirect(url_for('index'))

	if __name__ == '__main__':
	# Run database initialization
	init_app(app)

	# Get port from environment variable or use 7860 as default
	port = int(os.environ.get('PORT', 7860))
	app.run(host='0.0.0.0', port=port, debug=True)