import os from flask import Flask, render_template, request, jsonify from transformers import AutoTokenizer, AutoModelForSeq2SeqLM app = Flask(__name__) MODEL_NAME = "facebook/bart-large-cnn" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) @app.route('/') def index(): return render_template('index.html') @app.route('/summarize', methods=['POST']) def summarize(): try: data = request.get_json() text = data.get('text', '').strip() if not text or len(text.split()) < 15: return jsonify({'error': 'Text too short.'}), 400 inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True) summary_ids = model.generate( inputs["input_ids"], max_length=300, min_length=150, length_penalty=2.5, num_beams=5, no_repeat_ngram_size=3, early_stopping=False ) summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) # --- SAFE CLEANING LOGIC --- # 1. Fix punctuation spacing summary = summary.replace(" .", ".").replace(" ,", ",").replace(" !", "!").replace(" ?", "?") # 2. Capitalize ONLY the first letter of each sentence # This prevents "James Webb" from becoming "James webb" parts = summary.split(". ") cleaned_sentences = [] for p in parts: p = p.strip() if p: # Upcase only index 0, keep everything else exactly as the AI wrote it sentence = p[0].upper() + p[1:] cleaned_sentences.append(sentence) summary = ". ".join(cleaned_sentences) if summary and not summary.endswith('.'): summary += '.' return jsonify({'summary': summary}) except Exception as e: return jsonify({'error': str(e)}), 500 if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)