Spaces:

Rajak13
/

smart-summarizer

Running

App Files Files Community

smart-summarizer / webapp /app.py

Rajak13

Upload folder using huggingface_hub (#1)

634567d verified 23 days ago

raw

history blame contribute delete

8.56 kB

	"""
	Smart Summarizer - Flask Web Application
	Professional UI matching Figma design
	"""

	from flask import Flask, render_template, request, jsonify
	import sys
	from pathlib import Path
	import os
	from werkzeug.utils import secure_filename
	import PyPDF2
	from docx import Document as DocxDocument

	# Add project root to path
	sys.path.append(str(Path(__file__).parent.parent))

	from models.textrank import TextRankSummarizer
	from models.bart import BARTSummarizer
	from models.pegasus import PEGASUSSummarizer

	app = Flask(__name__)
	app.config['SECRET_KEY'] = 'your-secret-key-here'
	app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size
	app.config['UPLOAD_FOLDER'] = 'uploads'

	# Create uploads folder if it doesn't exist
	os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

	# Allowed file extensions
	ALLOWED_EXTENSIONS = {'txt', 'md', 'text', 'pdf', 'docx', 'doc'}

	def allowed_file(filename):
	return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

	# Initialize models (lazy loading)
	models = {}

	def get_model(model_name):
	"""Load and cache models"""
	if model_name not in models:
	if model_name == "textrank":
	models[model_name] = TextRankSummarizer()
	elif model_name == "bart":
	models[model_name] = BARTSummarizer(device='cpu')
	elif model_name == "pegasus":
	models[model_name] = PEGASUSSummarizer(device='cpu')
	return models[model_name]

	@app.route('/')
	def home():
	"""Home page"""
	return render_template('home.html')

	@app.route('/single-summary')
	def single_summary():
	"""Single summary page"""
	return render_template('single_summary.html')

	@app.route('/comparison')
	def comparison():
	"""Model comparison page"""
	return render_template('comparison.html')

	@app.route('/batch')
	def batch():
	"""Batch processing page"""
	return render_template('batch.html')

	@app.route('/evaluation')
	def evaluation():
	"""Evaluation page"""
	return render_template('evaluation.html')

	@app.route('/api/summarize', methods=['POST'])
	def summarize():
	"""API endpoint for summarization"""
	try:
	data = request.json
	text = data.get('text', '')
	model_name = data.get('model', 'bart').lower()

	if not text or len(text.split()) < 10:
	return jsonify({
	'success': False,
	'error': 'Please provide at least 10 words of text'
	}), 400

	# Get model
	model = get_model(model_name)

	# Calculate target summary length (approximately 20-25% of original)
	input_words = len(text.split())
	target_length = max(30, min(150, int(input_words * 0.22))) # 22% compression

	# Generate summary based on model type
	if model_name == 'textrank':
	# For TextRank, calculate number of sentences to achieve similar compression
	sentences = text.count('.') + text.count('!') + text.count('?')
	num_sentences = max(2, int(sentences * 0.3)) # ~30% of sentences
	result = model.summarize_with_metrics(text, num_sentences=num_sentences)
	else:
	# For BART and PEGASUS, use word-based limits
	result = model.summarize_with_metrics(
	text,
	max_length=target_length,
	min_length=max(20, int(target_length * 0.5))
	)

	return jsonify({
	'success': True,
	'summary': result['summary'],
	'metadata': result['metadata']
	})

	except Exception as e:
	return jsonify({
	'success': False,
	'error': str(e)
	}), 500

	@app.route('/api/compare', methods=['POST'])
	def compare():
	"""API endpoint for comparing all three models"""
	try:
	data = request.json
	text = data.get('text', '')

	if not text or len(text.split()) < 10:
	return jsonify({
	'success': False,
	'error': 'Please provide at least 10 words of text'
	}), 400

	results = {}

	# Calculate consistent target length for all models
	input_words = len(text.split())
	target_length = max(30, min(150, int(input_words * 0.22)))
	sentences = text.count('.') + text.count('!') + text.count('?')
	num_sentences = max(2, int(sentences * 0.3))

	# Run all three models
	for model_name in ['textrank', 'bart', 'pegasus']:
	try:
	model = get_model(model_name)

	if model_name == 'textrank':
	result = model.summarize_with_metrics(text, num_sentences=num_sentences)
	else:
	result = model.summarize_with_metrics(
	text,
	max_length=target_length,
	min_length=max(20, int(target_length * 0.5))
	)

	results[model_name] = {
	'summary': result['summary'],
	'metadata': result['metadata']
	}
	except Exception as e:
	results[model_name] = {
	'error': str(e)
	}

	return jsonify({
	'success': True,
	'results': results
	})

	except Exception as e:
	return jsonify({
	'success': False,
	'error': str(e)
	}), 500

	@app.route('/api/upload', methods=['POST'])
	def upload_file():
	"""API endpoint for file upload"""
	try:
	if 'file' not in request.files:
	return jsonify({
	'success': False,
	'error': 'No file provided'
	}), 400

	file = request.files['file']

	if file.filename == '':
	return jsonify({
	'success': False,
	'error': 'No file selected'
	}), 400

	if not allowed_file(file.filename):
	return jsonify({
	'success': False,
	'error': 'Invalid file type. Please upload .txt, .md, .pdf, .docx, or .doc files'
	}), 400

	# Extract text based on file type
	filename = secure_filename(file.filename)
	file_ext = filename.rsplit('.', 1)[1].lower()

	try:
	if file_ext in ['txt', 'md', 'text']:
	# Plain text files
	text = file.read().decode('utf-8')

	elif file_ext == 'pdf':
	# PDF files
	pdf_reader = PyPDF2.PdfReader(file)
	text = ''
	for page in pdf_reader.pages:
	text += page.extract_text() + '\n'

	elif file_ext in ['docx', 'doc']:
	# Word documents
	doc = DocxDocument(file)
	text = '\n'.join([paragraph.text for paragraph in doc.paragraphs])

	else:
	return jsonify({
	'success': False,
	'error': 'Unsupported file format'
	}), 400

	except UnicodeDecodeError:
	return jsonify({
	'success': False,
	'error': 'File encoding not supported. Please use UTF-8 encoded files'
	}), 400
	except Exception as e:
	return jsonify({
	'success': False,
	'error': f'Error reading file: {str(e)}'
	}), 400

	if not text or len(text.split()) < 10:
	return jsonify({
	'success': False,
	'error': 'File content is too short. Please provide at least 10 words'
	}), 400

	return jsonify({
	'success': True,
	'text': text,
	'filename': filename,
	'word_count': len(text.split())
	})

	except Exception as e:
	return jsonify({
	'success': False,
	'error': str(e)
	}), 500

	if __name__ == '__main__':
	import os

	# Get port from environment variable (Hugging Face Spaces uses 7860)
	port = int(os.environ.get('PORT', 7860))

	# Check if running in production
	debug = os.environ.get('FLASK_ENV') != 'production'

	# Bind to 0.0.0.0 for cloud deployment
	app.run(host='0.0.0.0', port=port, debug=debug)