Spaces:

kyserS09
/

summariser

Sleeping

App Files Files Community

kyserS09 commited on Feb 14, 2025

Commit

3d097bd

verified ·

1 Parent(s): bbb8ec0

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -20

app.py CHANGED Viewed

@@ -1,34 +1,125 @@
 import gradio as gr
-from transformers import LEDTokenizer, LEDForConditionalGeneration
-# Use Longformer Encoder-Decoder (LED) model
-model_name = "allenai/led-large-16384"
-tokenizer = LEDTokenizer.from_pretrained(model_name)
-model = LEDForConditionalGeneration.from_pretrained(model_name)
-def summarize_text(text):
-    # Tokenize input with truncation to fit within 16,384 tokens
-    inputs = tokenizer([text], max_length=16384, return_tensors="pt", truncation=True)
-    # Generate summary with adjusted parameters
-    summary_ids = model.generate(
-        inputs["input_ids"],
-        num_beams=4,
-        max_length=512,  # Can be adjusted based on summary size needs
-        min_length=100,
-        early_stopping=True
-    )
-    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
-    return summary
 # Gradio Interface
 iface = gr.Interface(
     fn=summarize_text,
     inputs="text",
     outputs="text",
-    title="Longformer Summarizer",
-    description="Enter text to get a summary using the Longformer Encoder-Decoder."
 )
 if __name__ == "__main__":

 import gradio as gr
+import os
+import requests
+import torch
+from transformers import (
+    LEDTokenizer, LEDForConditionalGeneration,
+    BartTokenizer, BartForConditionalGeneration,
+    PegasusTokenizer, PegasusForConditionalGeneration,
+    AutoTokenizer, AutoModelForSeq2SeqLM
+)
+# OpenAI API Key
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")  # Ensure this is set in your environment variables
+# List of models in priority order
+MODELS = [
+    {
+        "name": "allenai/led-large-16384",
+        "tokenizer_class": LEDTokenizer,
+        "model_class": LEDForConditionalGeneration
+    },
+    {
+        "name": "facebook/bart-large-cnn",
+        "tokenizer_class": BartTokenizer,
+        "model_class": BartForConditionalGeneration
+    },
+    {
+        "name": "Falconsai/text_summarization",
+        "tokenizer_class": AutoTokenizer,
+        "model_class": AutoModelForSeq2SeqLM
+    },
+    {
+        "name": "google/pegasus-xsum",
+        "tokenizer_class": PegasusTokenizer,
+        "model_class": PegasusForConditionalGeneration
+    }
+]
+# Load models sequentially
+loaded_models = []
+for model_info in MODELS:
+    try:
+        tokenizer = model_info["tokenizer_class"].from_pretrained(model_info["name"])
+        model = model_info["model_class"].from_pretrained(model_info["name"])
+        loaded_models.append({"name": model_info["name"], "tokenizer": tokenizer, "model": model})
+        print(f"Loaded model: {model_info['name']}")
+    except Exception as e:
+        print(f"Failed to load {model_info['name']}: {e}")
+def summarize_with_transformers(text):
+    """
+    Try summarizing with locally loaded Transformer models in order of priority.
+    """
+    for model_data in loaded_models:
+        try:
+            tokenizer = model_data["tokenizer"]
+            model = model_data["model"]
+            # Tokenize input with truncation
+            inputs = tokenizer([text], max_length=16384, return_tensors="pt", truncation=True)
+            # Generate summary
+            summary_ids = model.generate(
+                inputs["input_ids"],
+                num_beams=4,
+                max_length=512,
+                min_length=100,
+                early_stopping=True
+            )
+            summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+            return summary  # Return the first successful response
+        except Exception as e:
+            print(f"Error using {model_data['name']}: {e}")
+    return None  # Indicate failure
+def summarize_with_chatgpt(text):
+    """
+    Fallback to OpenAI ChatGPT API if all other models fail.
+    """
+    if not OPENAI_API_KEY:
+        return "Error: No OpenAI API key provided."
+    headers = {
+        "Authorization": f"Bearer {OPENAI_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "model": "gpt-3.5-turbo",
+        "messages": [{"role": "user", "content": f"Summarize this article: {text}"}],
+        "max_tokens": 512
+    }
+    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+    if response.status_code == 200:
+        return response.json()["choices"][0]["message"]["content"]
+    else:
+        return f"Error: Failed to summarize with ChatGPT (status {response.status_code})"
+def summarize_text(text):
+    """
+    Main function to summarize text, trying Transformer models first, then ChatGPT if needed.
+    """
+    summary = summarize_with_transformers(text)
+    if summary:
+        return summary  # Return successful summary from a Transformer model
+    print("All Transformer models failed. Falling back to ChatGPT...")
+    return summarize_with_chatgpt(text)  # Use ChatGPT as last resort
 # Gradio Interface
 iface = gr.Interface(
     fn=summarize_text,
     inputs="text",
     outputs="text",
+    title="Multi-Model Summarizer with Fallback",
+    description="Tries multiple models for summarization, falling back to ChatGPT if needed."
 )
 if __name__ == "__main__":