Spaces:
Running
Running
| """ | |
| Smart Summarizer - Flask Web Application | |
| Professional UI matching Figma design | |
| """ | |
| from flask import Flask, render_template, request, jsonify | |
| import sys | |
| from pathlib import Path | |
| import os | |
| from werkzeug.utils import secure_filename | |
| import PyPDF2 | |
| from docx import Document as DocxDocument | |
| # Add project root to path | |
| sys.path.append(str(Path(__file__).parent.parent)) | |
| from models.textrank import TextRankSummarizer | |
| from models.bart import BARTSummarizer | |
| from models.pegasus import PEGASUSSummarizer | |
| app = Flask(__name__) | |
| app.config['SECRET_KEY'] = 'your-secret-key-here' | |
| app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size | |
| app.config['UPLOAD_FOLDER'] = 'uploads' | |
| # Create uploads folder if it doesn't exist | |
| os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) | |
| # Allowed file extensions | |
| ALLOWED_EXTENSIONS = {'txt', 'md', 'text', 'pdf', 'docx', 'doc'} | |
| def allowed_file(filename): | |
| return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| # Initialize models (lazy loading) | |
| models = {} | |
| def get_model(model_name): | |
| """Load and cache models""" | |
| if model_name not in models: | |
| if model_name == "textrank": | |
| models[model_name] = TextRankSummarizer() | |
| elif model_name == "bart": | |
| models[model_name] = BARTSummarizer(device='cpu') | |
| elif model_name == "pegasus": | |
| models[model_name] = PEGASUSSummarizer(device='cpu') | |
| return models[model_name] | |
| def home(): | |
| """Home page""" | |
| return render_template('home.html') | |
| def single_summary(): | |
| """Single summary page""" | |
| return render_template('single_summary.html') | |
| def comparison(): | |
| """Model comparison page""" | |
| return render_template('comparison.html') | |
| def batch(): | |
| """Batch processing page""" | |
| return render_template('batch.html') | |
| def evaluation(): | |
| """Evaluation page""" | |
| return render_template('evaluation.html') | |
| def summarize(): | |
| """API endpoint for summarization""" | |
| try: | |
| data = request.json | |
| text = data.get('text', '') | |
| model_name = data.get('model', 'bart').lower() | |
| if not text or len(text.split()) < 10: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': 'Please provide at least 10 words of text' | |
| }), 400 | |
| # Get model | |
| model = get_model(model_name) | |
| # Calculate target summary length (approximately 20-25% of original) | |
| input_words = len(text.split()) | |
| target_length = max(30, min(150, int(input_words * 0.22))) # 22% compression | |
| # Generate summary based on model type | |
| if model_name == 'textrank': | |
| # For TextRank, calculate number of sentences to achieve similar compression | |
| sentences = text.count('.') + text.count('!') + text.count('?') | |
| num_sentences = max(2, int(sentences * 0.3)) # ~30% of sentences | |
| result = model.summarize_with_metrics(text, num_sentences=num_sentences) | |
| else: | |
| # For BART and PEGASUS, use word-based limits | |
| result = model.summarize_with_metrics( | |
| text, | |
| max_length=target_length, | |
| min_length=max(20, int(target_length * 0.5)) | |
| ) | |
| return jsonify({ | |
| 'success': True, | |
| 'summary': result['summary'], | |
| 'metadata': result['metadata'] | |
| }) | |
| except Exception as e: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': str(e) | |
| }), 500 | |
| def compare(): | |
| """API endpoint for comparing all three models""" | |
| try: | |
| data = request.json | |
| text = data.get('text', '') | |
| if not text or len(text.split()) < 10: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': 'Please provide at least 10 words of text' | |
| }), 400 | |
| results = {} | |
| # Calculate consistent target length for all models | |
| input_words = len(text.split()) | |
| target_length = max(30, min(150, int(input_words * 0.22))) | |
| sentences = text.count('.') + text.count('!') + text.count('?') | |
| num_sentences = max(2, int(sentences * 0.3)) | |
| # Run all three models | |
| for model_name in ['textrank', 'bart', 'pegasus']: | |
| try: | |
| model = get_model(model_name) | |
| if model_name == 'textrank': | |
| result = model.summarize_with_metrics(text, num_sentences=num_sentences) | |
| else: | |
| result = model.summarize_with_metrics( | |
| text, | |
| max_length=target_length, | |
| min_length=max(20, int(target_length * 0.5)) | |
| ) | |
| results[model_name] = { | |
| 'summary': result['summary'], | |
| 'metadata': result['metadata'] | |
| } | |
| except Exception as e: | |
| results[model_name] = { | |
| 'error': str(e) | |
| } | |
| return jsonify({ | |
| 'success': True, | |
| 'results': results | |
| }) | |
| except Exception as e: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': str(e) | |
| }), 500 | |
| def upload_file(): | |
| """API endpoint for file upload""" | |
| try: | |
| if 'file' not in request.files: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': 'No file provided' | |
| }), 400 | |
| file = request.files['file'] | |
| if file.filename == '': | |
| return jsonify({ | |
| 'success': False, | |
| 'error': 'No file selected' | |
| }), 400 | |
| if not allowed_file(file.filename): | |
| return jsonify({ | |
| 'success': False, | |
| 'error': 'Invalid file type. Please upload .txt, .md, .pdf, .docx, or .doc files' | |
| }), 400 | |
| # Extract text based on file type | |
| filename = secure_filename(file.filename) | |
| file_ext = filename.rsplit('.', 1)[1].lower() | |
| try: | |
| if file_ext in ['txt', 'md', 'text']: | |
| # Plain text files | |
| text = file.read().decode('utf-8') | |
| elif file_ext == 'pdf': | |
| # PDF files | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| text = '' | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() + '\n' | |
| elif file_ext in ['docx', 'doc']: | |
| # Word documents | |
| doc = DocxDocument(file) | |
| text = '\n'.join([paragraph.text for paragraph in doc.paragraphs]) | |
| else: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': 'Unsupported file format' | |
| }), 400 | |
| except UnicodeDecodeError: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': 'File encoding not supported. Please use UTF-8 encoded files' | |
| }), 400 | |
| except Exception as e: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': f'Error reading file: {str(e)}' | |
| }), 400 | |
| if not text or len(text.split()) < 10: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': 'File content is too short. Please provide at least 10 words' | |
| }), 400 | |
| return jsonify({ | |
| 'success': True, | |
| 'text': text, | |
| 'filename': filename, | |
| 'word_count': len(text.split()) | |
| }) | |
| except Exception as e: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': str(e) | |
| }), 500 | |
| if __name__ == '__main__': | |
| import os | |
| # Get port from environment variable (Hugging Face Spaces uses 7860) | |
| port = int(os.environ.get('PORT', 7860)) | |
| # Check if running in production | |
| debug = os.environ.get('FLASK_ENV') != 'production' | |
| # Bind to 0.0.0.0 for cloud deployment | |
| app.run(host='0.0.0.0', port=port, debug=debug) | |