""" Smart Summarizer - Flask Web Application Professional UI matching Figma design """ from flask import Flask, render_template, request, jsonify import sys from pathlib import Path import os from werkzeug.utils import secure_filename import PyPDF2 from docx import Document as DocxDocument # Add project root to path sys.path.append(str(Path(__file__).parent.parent)) from models.textrank import TextRankSummarizer from models.bart import BARTSummarizer from models.pegasus import PEGASUSSummarizer app = Flask(__name__) app.config['SECRET_KEY'] = 'your-secret-key-here' app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size app.config['UPLOAD_FOLDER'] = 'uploads' # Create uploads folder if it doesn't exist os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) # Allowed file extensions ALLOWED_EXTENSIONS = {'txt', 'md', 'text', 'pdf', 'docx', 'doc'} def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS # Initialize models (lazy loading) models = {} def get_model(model_name): """Load and cache models""" if model_name not in models: if model_name == "textrank": models[model_name] = TextRankSummarizer() elif model_name == "bart": models[model_name] = BARTSummarizer(device='cpu') elif model_name == "pegasus": models[model_name] = PEGASUSSummarizer(device='cpu') return models[model_name] @app.route('/') def home(): """Home page""" return render_template('home.html') @app.route('/single-summary') def single_summary(): """Single summary page""" return render_template('single_summary.html') @app.route('/comparison') def comparison(): """Model comparison page""" return render_template('comparison.html') @app.route('/batch') def batch(): """Batch processing page""" return render_template('batch.html') @app.route('/evaluation') def evaluation(): """Evaluation page""" return render_template('evaluation.html') @app.route('/api/summarize', methods=['POST']) def summarize(): """API endpoint for summarization""" try: data = request.json text = data.get('text', '') model_name = data.get('model', 'bart').lower() if not text or len(text.split()) < 10: return jsonify({ 'success': False, 'error': 'Please provide at least 10 words of text' }), 400 # Get model model = get_model(model_name) # Calculate target summary length (approximately 20-25% of original) input_words = len(text.split()) target_length = max(30, min(150, int(input_words * 0.22))) # 22% compression # Generate summary based on model type if model_name == 'textrank': # For TextRank, calculate number of sentences to achieve similar compression sentences = text.count('.') + text.count('!') + text.count('?') num_sentences = max(2, int(sentences * 0.3)) # ~30% of sentences result = model.summarize_with_metrics(text, num_sentences=num_sentences) else: # For BART and PEGASUS, use word-based limits result = model.summarize_with_metrics( text, max_length=target_length, min_length=max(20, int(target_length * 0.5)) ) return jsonify({ 'success': True, 'summary': result['summary'], 'metadata': result['metadata'] }) except Exception as e: return jsonify({ 'success': False, 'error': str(e) }), 500 @app.route('/api/compare', methods=['POST']) def compare(): """API endpoint for comparing all three models""" try: data = request.json text = data.get('text', '') if not text or len(text.split()) < 10: return jsonify({ 'success': False, 'error': 'Please provide at least 10 words of text' }), 400 results = {} # Calculate consistent target length for all models input_words = len(text.split()) target_length = max(30, min(150, int(input_words * 0.22))) sentences = text.count('.') + text.count('!') + text.count('?') num_sentences = max(2, int(sentences * 0.3)) # Run all three models for model_name in ['textrank', 'bart', 'pegasus']: try: model = get_model(model_name) if model_name == 'textrank': result = model.summarize_with_metrics(text, num_sentences=num_sentences) else: result = model.summarize_with_metrics( text, max_length=target_length, min_length=max(20, int(target_length * 0.5)) ) results[model_name] = { 'summary': result['summary'], 'metadata': result['metadata'] } except Exception as e: results[model_name] = { 'error': str(e) } return jsonify({ 'success': True, 'results': results }) except Exception as e: return jsonify({ 'success': False, 'error': str(e) }), 500 @app.route('/api/upload', methods=['POST']) def upload_file(): """API endpoint for file upload""" try: if 'file' not in request.files: return jsonify({ 'success': False, 'error': 'No file provided' }), 400 file = request.files['file'] if file.filename == '': return jsonify({ 'success': False, 'error': 'No file selected' }), 400 if not allowed_file(file.filename): return jsonify({ 'success': False, 'error': 'Invalid file type. Please upload .txt, .md, .pdf, .docx, or .doc files' }), 400 # Extract text based on file type filename = secure_filename(file.filename) file_ext = filename.rsplit('.', 1)[1].lower() try: if file_ext in ['txt', 'md', 'text']: # Plain text files text = file.read().decode('utf-8') elif file_ext == 'pdf': # PDF files pdf_reader = PyPDF2.PdfReader(file) text = '' for page in pdf_reader.pages: text += page.extract_text() + '\n' elif file_ext in ['docx', 'doc']: # Word documents doc = DocxDocument(file) text = '\n'.join([paragraph.text for paragraph in doc.paragraphs]) else: return jsonify({ 'success': False, 'error': 'Unsupported file format' }), 400 except UnicodeDecodeError: return jsonify({ 'success': False, 'error': 'File encoding not supported. Please use UTF-8 encoded files' }), 400 except Exception as e: return jsonify({ 'success': False, 'error': f'Error reading file: {str(e)}' }), 400 if not text or len(text.split()) < 10: return jsonify({ 'success': False, 'error': 'File content is too short. Please provide at least 10 words' }), 400 return jsonify({ 'success': True, 'text': text, 'filename': filename, 'word_count': len(text.split()) }) except Exception as e: return jsonify({ 'success': False, 'error': str(e) }), 500 if __name__ == '__main__': import os # Get port from environment variable (Hugging Face Spaces uses 7860) port = int(os.environ.get('PORT', 7860)) # Check if running in production debug = os.environ.get('FLASK_ENV') != 'production' # Bind to 0.0.0.0 for cloud deployment app.run(host='0.0.0.0', port=port, debug=debug)