Spaces:
Runtime error
Runtime error
| # app.py - Conceptual Backend for Email Summarizer using Flask and Hugging Face Transformers | |
| from flask import Flask, request, jsonify | |
| from transformers import pipeline | |
| from flask_cors import CORS | |
| import os # For environment variables, if needed | |
| app = Flask(__name__) | |
| # Enable CORS for all routes. This is crucial to allow your frontend (e.g., a React app | |
| # running on a different port or domain) to make requests to this backend. | |
| CORS(app) | |
| # --- Model Loading --- | |
| # This section loads the pre-trained summarization model into memory. | |
| # It's done once when the Flask application starts to avoid reloading for every request. | |
| # The 'facebook/mbart-large-50-many-to-many-mmt' model is chosen as requested, | |
| # which is a large multilingual sequence-to-sequence model primarily for translation. | |
| # While it can be used with a summarization pipeline, its primary strength is translation. | |
| # For dedicated multilingual summarization, a model specifically fine-tuned for that | |
| # task would generally yield better results. For English-only summarization, | |
| # 'sshleifer/distilbart-cnn-12-6' or 'facebook/bart-large-cnn' are common choices. | |
| # Initialize summarizer pipeline. This might take a while on first run | |
| # as it downloads the model weights. | |
| summarizer = None | |
| try: | |
| print("Attempting to load summarization model 'facebook/mbart-large-50-many-to-many-mmt'...") | |
| # The 'summarization' pipeline will try to adapt the model for summarization. | |
| # For mbart, it might perform better if explicitly given source and target languages | |
| # if you were doing a translate-then-summarize approach, but for simplicity, | |
| # we let the pipeline handle it. | |
| summarizer = pipeline("summarization", model="facebook/mbart-large-50-many-to-many-mmt", tokenizer="facebook/mbart-large-50-many-to-many-mmt") | |
| print("Model 'facebook/mbart-large-50-many-to-many-mmt' loaded successfully.") | |
| except Exception as e: | |
| print(f"ERROR: Could not load summarization model. Please ensure you have 'torch' or 'tensorflow' installed and sufficient memory. Details: {e}") | |
| # If model loading fails, the summarizer remains None, and subsequent API calls will return an error. | |
| # --- API Endpoint for Summarization --- | |
| def summarize_email(): | |
| """ | |
| Handles POST requests to summarize an email thread. | |
| Expects a JSON payload with 'email_thread' (string) and optionally 'language' (string). | |
| Returns a JSON response containing the 'summary' or an 'error' message. | |
| """ | |
| # Check if the model was loaded successfully | |
| if summarizer is None: | |
| return jsonify({"error": "Summarization service is not available. Model failed to load."}), 503 # Service Unavailable | |
| # Ensure the request content type is JSON | |
| if not request.is_json: | |
| return jsonify({"error": "Request must be JSON"}), 400 | |
| data = request.get_json() | |
| # Extract email thread and target language from the request payload | |
| email_thread = data.get('email_thread', '').strip() | |
| target_language = data.get('language', 'English') # Default to English if not provided | |
| # Basic input validation | |
| if not email_thread: | |
| return jsonify({"error": "Email thread content is required for summarization."}), 400 | |
| print(f"Received request to summarize in {target_language} for thread length: {len(email_thread)} characters.") | |
| try: | |
| # Perform the summarization using the loaded Hugging Face pipeline. | |
| # max_length and min_length control the output summary's length. | |
| # do_sample=False ensures deterministic output (no random sampling). | |
| # For multilingual summarization with mbart, you might need to | |
| # explicitly set 'src_lang' and 'tgt_lang' if the pipeline doesn't | |
| # automatically infer or handle it for summarization. | |
| # Example: src_lang="en_XX", tgt_lang="es_XX" | |
| # However, the 'summarization' pipeline typically works by taking text | |
| # and producing a summary in the model's primary output language (often English), | |
| # or if the model is truly multilingual for summarization, it might adapt. | |
| # For a robust solution, you'd likely translate the input to English, summarize, | |
| # then translate the summary to the target_language. This example keeps it simple. | |
| summary_result = summarizer( | |
| email_thread, | |
| max_length=150, # Max tokens in the summary | |
| min_length=30, # Min tokens in the summary | |
| do_sample=False # For more consistent results | |
| # For mbart, if you were doing translation as part of the summarization flow: | |
| # src_lang="en_XX", # Example: assuming input is English | |
| # tgt_lang="es_XX" # Example: requesting summary in Spanish | |
| # You would need a mapping from 'English', 'Spanish', etc., to mbart's language codes. | |
| ) | |
| # Extract the summary text from the pipeline's output | |
| summary = summary_result[0]['summary_text'] | |
| print("Summarization successful.") | |
| return jsonify({"summary": summary}), 200 | |
| except Exception as e: | |
| print(f"Error during summarization process: {e}") | |
| return jsonify({"error": f"An internal server error occurred during summarization: {str(e)}"}), 500 | |
| # --- Main Execution Block --- | |
| if __name__ == '__main__': | |
| # Get port from environment variable or default to 5000 | |
| port = int(os.environ.get('PORT', 5000)) | |
| print(f"Starting Flask application on port {port}...") | |
| # Run the Flask development server. | |
| # In a production environment, you would use a WSGI server like Gunicorn or uWSGI. | |
| # debug=True enables debug mode, which provides detailed error messages and | |
| # automatically reloads the server on code changes. Set to False for production. | |
| app.run(debug=True, host='0.0.0.0', port=port) # Listen on all public IPs | |