Spaces:

jayaprakashgedela
/

emailsummarizer

Runtime error

App Files Files Community

emailsummarizer / app.py

jayaprakashgedela

Create app.py

74319fc verified 11 months ago

raw

history blame contribute delete

5.86 kB

	# app.py - Conceptual Backend for Email Summarizer using Flask and Hugging Face Transformers

	from flask import Flask, request, jsonify
	from transformers import pipeline
	from flask_cors import CORS
	import os # For environment variables, if needed

	app = Flask(__name__)
	# Enable CORS for all routes. This is crucial to allow your frontend (e.g., a React app
	# running on a different port or domain) to make requests to this backend.
	CORS(app)

	# --- Model Loading ---
	# This section loads the pre-trained summarization model into memory.
	# It's done once when the Flask application starts to avoid reloading for every request.
	# The 'facebook/mbart-large-50-many-to-many-mmt' model is chosen as requested,
	# which is a large multilingual sequence-to-sequence model primarily for translation.
	# While it can be used with a summarization pipeline, its primary strength is translation.
	# For dedicated multilingual summarization, a model specifically fine-tuned for that
	# task would generally yield better results. For English-only summarization,
	# 'sshleifer/distilbart-cnn-12-6' or 'facebook/bart-large-cnn' are common choices.

	# Initialize summarizer pipeline. This might take a while on first run
	# as it downloads the model weights.
	summarizer = None
	try:
	print("Attempting to load summarization model 'facebook/mbart-large-50-many-to-many-mmt'...")
	# The 'summarization' pipeline will try to adapt the model for summarization.
	# For mbart, it might perform better if explicitly given source and target languages
	# if you were doing a translate-then-summarize approach, but for simplicity,
	# we let the pipeline handle it.
	summarizer = pipeline("summarization", model="facebook/mbart-large-50-many-to-many-mmt", tokenizer="facebook/mbart-large-50-many-to-many-mmt")
	print("Model 'facebook/mbart-large-50-many-to-many-mmt' loaded successfully.")
	except Exception as e:
	print(f"ERROR: Could not load summarization model. Please ensure you have 'torch' or 'tensorflow' installed and sufficient memory. Details: {e}")
	# If model loading fails, the summarizer remains None, and subsequent API calls will return an error.

	# --- API Endpoint for Summarization ---
	@app.route('/summarize', methods=['POST'])
	def summarize_email():
	"""
	Handles POST requests to summarize an email thread.
	Expects a JSON payload with 'email_thread' (string) and optionally 'language' (string).
	Returns a JSON response containing the 'summary' or an 'error' message.
	"""
	# Check if the model was loaded successfully
	if summarizer is None:
	return jsonify({"error": "Summarization service is not available. Model failed to load."}), 503 # Service Unavailable

	# Ensure the request content type is JSON
	if not request.is_json:
	return jsonify({"error": "Request must be JSON"}), 400

	data = request.get_json()

	# Extract email thread and target language from the request payload
	email_thread = data.get('email_thread', '').strip()
	target_language = data.get('language', 'English') # Default to English if not provided

	# Basic input validation
	if not email_thread:
	return jsonify({"error": "Email thread content is required for summarization."}), 400

	print(f"Received request to summarize in {target_language} for thread length: {len(email_thread)} characters.")

	try:
	# Perform the summarization using the loaded Hugging Face pipeline.
	# max_length and min_length control the output summary's length.
	# do_sample=False ensures deterministic output (no random sampling).
	# For multilingual summarization with mbart, you might need to
	# explicitly set 'src_lang' and 'tgt_lang' if the pipeline doesn't
	# automatically infer or handle it for summarization.
	# Example: src_lang="en_XX", tgt_lang="es_XX"
	# However, the 'summarization' pipeline typically works by taking text
	# and producing a summary in the model's primary output language (often English),
	# or if the model is truly multilingual for summarization, it might adapt.
	# For a robust solution, you'd likely translate the input to English, summarize,
	# then translate the summary to the target_language. This example keeps it simple.
	summary_result = summarizer(
	email_thread,
	max_length=150, # Max tokens in the summary
	min_length=30, # Min tokens in the summary
	do_sample=False # For more consistent results
	# For mbart, if you were doing translation as part of the summarization flow:
	# src_lang="en_XX", # Example: assuming input is English
	# tgt_lang="es_XX" # Example: requesting summary in Spanish
	# You would need a mapping from 'English', 'Spanish', etc., to mbart's language codes.
	)

	# Extract the summary text from the pipeline's output
	summary = summary_result[0]['summary_text']
	print("Summarization successful.")
	return jsonify({"summary": summary}), 200

	except Exception as e:
	print(f"Error during summarization process: {e}")
	return jsonify({"error": f"An internal server error occurred during summarization: {str(e)}"}), 500

	# --- Main Execution Block ---
	if __name__ == '__main__':
	# Get port from environment variable or default to 5000
	port = int(os.environ.get('PORT', 5000))
	print(f"Starting Flask application on port {port}...")
	# Run the Flask development server.
	# In a production environment, you would use a WSGI server like Gunicorn or uWSGI.
	# debug=True enables debug mode, which provides detailed error messages and
	# automatically reloads the server on code changes. Set to False for production.
	app.run(debug=True, host='0.0.0.0', port=port) # Listen on all public IPs