emailsummarizer / app.py
jayaprakashgedela's picture
Create app.py
74319fc verified
# app.py - Conceptual Backend for Email Summarizer using Flask and Hugging Face Transformers
from flask import Flask, request, jsonify
from transformers import pipeline
from flask_cors import CORS
import os # For environment variables, if needed
app = Flask(__name__)
# Enable CORS for all routes. This is crucial to allow your frontend (e.g., a React app
# running on a different port or domain) to make requests to this backend.
CORS(app)
# --- Model Loading ---
# This section loads the pre-trained summarization model into memory.
# It's done once when the Flask application starts to avoid reloading for every request.
# The 'facebook/mbart-large-50-many-to-many-mmt' model is chosen as requested,
# which is a large multilingual sequence-to-sequence model primarily for translation.
# While it can be used with a summarization pipeline, its primary strength is translation.
# For dedicated multilingual summarization, a model specifically fine-tuned for that
# task would generally yield better results. For English-only summarization,
# 'sshleifer/distilbart-cnn-12-6' or 'facebook/bart-large-cnn' are common choices.
# Initialize summarizer pipeline. This might take a while on first run
# as it downloads the model weights.
summarizer = None
try:
print("Attempting to load summarization model 'facebook/mbart-large-50-many-to-many-mmt'...")
# The 'summarization' pipeline will try to adapt the model for summarization.
# For mbart, it might perform better if explicitly given source and target languages
# if you were doing a translate-then-summarize approach, but for simplicity,
# we let the pipeline handle it.
summarizer = pipeline("summarization", model="facebook/mbart-large-50-many-to-many-mmt", tokenizer="facebook/mbart-large-50-many-to-many-mmt")
print("Model 'facebook/mbart-large-50-many-to-many-mmt' loaded successfully.")
except Exception as e:
print(f"ERROR: Could not load summarization model. Please ensure you have 'torch' or 'tensorflow' installed and sufficient memory. Details: {e}")
# If model loading fails, the summarizer remains None, and subsequent API calls will return an error.
# --- API Endpoint for Summarization ---
@app.route('/summarize', methods=['POST'])
def summarize_email():
"""
Handles POST requests to summarize an email thread.
Expects a JSON payload with 'email_thread' (string) and optionally 'language' (string).
Returns a JSON response containing the 'summary' or an 'error' message.
"""
# Check if the model was loaded successfully
if summarizer is None:
return jsonify({"error": "Summarization service is not available. Model failed to load."}), 503 # Service Unavailable
# Ensure the request content type is JSON
if not request.is_json:
return jsonify({"error": "Request must be JSON"}), 400
data = request.get_json()
# Extract email thread and target language from the request payload
email_thread = data.get('email_thread', '').strip()
target_language = data.get('language', 'English') # Default to English if not provided
# Basic input validation
if not email_thread:
return jsonify({"error": "Email thread content is required for summarization."}), 400
print(f"Received request to summarize in {target_language} for thread length: {len(email_thread)} characters.")
try:
# Perform the summarization using the loaded Hugging Face pipeline.
# max_length and min_length control the output summary's length.
# do_sample=False ensures deterministic output (no random sampling).
# For multilingual summarization with mbart, you might need to
# explicitly set 'src_lang' and 'tgt_lang' if the pipeline doesn't
# automatically infer or handle it for summarization.
# Example: src_lang="en_XX", tgt_lang="es_XX"
# However, the 'summarization' pipeline typically works by taking text
# and producing a summary in the model's primary output language (often English),
# or if the model is truly multilingual for summarization, it might adapt.
# For a robust solution, you'd likely translate the input to English, summarize,
# then translate the summary to the target_language. This example keeps it simple.
summary_result = summarizer(
email_thread,
max_length=150, # Max tokens in the summary
min_length=30, # Min tokens in the summary
do_sample=False # For more consistent results
# For mbart, if you were doing translation as part of the summarization flow:
# src_lang="en_XX", # Example: assuming input is English
# tgt_lang="es_XX" # Example: requesting summary in Spanish
# You would need a mapping from 'English', 'Spanish', etc., to mbart's language codes.
)
# Extract the summary text from the pipeline's output
summary = summary_result[0]['summary_text']
print("Summarization successful.")
return jsonify({"summary": summary}), 200
except Exception as e:
print(f"Error during summarization process: {e}")
return jsonify({"error": f"An internal server error occurred during summarization: {str(e)}"}), 500
# --- Main Execution Block ---
if __name__ == '__main__':
# Get port from environment variable or default to 5000
port = int(os.environ.get('PORT', 5000))
print(f"Starting Flask application on port {port}...")
# Run the Flask development server.
# In a production environment, you would use a WSGI server like Gunicorn or uWSGI.
# debug=True enables debug mode, which provides detailed error messages and
# automatically reloads the server on code changes. Set to False for production.
app.run(debug=True, host='0.0.0.0', port=port) # Listen on all public IPs