Spaces:
Build error
Build error
Effah Kofi Boakye Yiadom
commited on
Upload 3 files
Browse files- requirements.txt +2 -0
- streamlit.py +20 -0
- summarize.py +72 -0
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openai
|
| 2 |
+
streamlit
|
streamlit.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from summarize import load_text, summarize_text
|
| 3 |
+
|
| 4 |
+
# Streamlit app
|
| 5 |
+
st.title("Text Summarizer with Hugging Face")
|
| 6 |
+
|
| 7 |
+
# User input for URL
|
| 8 |
+
url = st.text_input("Enter the URL of the article or blog post:")
|
| 9 |
+
|
| 10 |
+
if st.button("Summarize"):
|
| 11 |
+
if url:
|
| 12 |
+
text = load_text(url)
|
| 13 |
+
if text:
|
| 14 |
+
summary = summarize_text(text)
|
| 15 |
+
st.subheader("Summary:")
|
| 16 |
+
st.write(summary["output_text"])
|
| 17 |
+
else:
|
| 18 |
+
st.error("Failed to load text from the URL.")
|
| 19 |
+
else:
|
| 20 |
+
st.error("Please enter a valid URL.")
|
summarize.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import time
|
| 3 |
+
import streamlit as st
|
| 4 |
+
import together
|
| 5 |
+
import requests
|
| 6 |
+
|
| 7 |
+
# ✅ Load Together API key from environment variable
|
| 8 |
+
os.getenv("TOGETHERAI_API_KEY") == "9c679ff9d00b1c3e633ef3bb0ec44a8002a2b74d2f535d709b1bd85d26a168ee"
|
| 9 |
+
|
| 10 |
+
# ✅ Set API key for Together AI
|
| 11 |
+
together.api_key = "9c679ff9d00b1c3e633ef3bb0ec44a8002a2b74d2f535d709b1bd85d26a168ee"
|
| 12 |
+
|
| 13 |
+
# ✅ Function to load text from a URL
|
| 14 |
+
def load_text(url):
|
| 15 |
+
"""Fetches text from a URL"""
|
| 16 |
+
try:
|
| 17 |
+
headers = {"User-Agent": os.getenv("USER_AGENT", "Mozilla/5.0")}
|
| 18 |
+
response = requests.get(url, headers=headers)
|
| 19 |
+
response.raise_for_status()
|
| 20 |
+
return response.text[:20000] # Doubled input limit for more context
|
| 21 |
+
except Exception as e:
|
| 22 |
+
st.error(f"Error loading URL: {e}")
|
| 23 |
+
return None
|
| 24 |
+
|
| 25 |
+
# ✅ Function to summarize text with richer, more detailed output
|
| 26 |
+
def summarize_text(text, max_retries=3, retry_delay=5):
|
| 27 |
+
"""Summarize text using Together AI API with retry logic for extensive output"""
|
| 28 |
+
text = text[:20000] # Increased input limit to 20,000 characters (~5,000 tokens)
|
| 29 |
+
|
| 30 |
+
# Enhanced prompt for a more detailed, comprehensive summary
|
| 31 |
+
prompt = f"""
|
| 32 |
+
You are an expert summarizer tasked with creating a highly detailed, comprehensive, and well-structured summary of the url provided.
|
| 33 |
+
Provide a summary in 20-35 sentences that thoroughly captures the main points, key details, significant insights, and important examples or arguments presented in the text.
|
| 34 |
+
Ensure the summary is informative, coherent, and rich in content, avoiding vague or overly simplistic statements.
|
| 35 |
+
Include context where relevant and aim to give a complete picture of the text’s purpose and findings.
|
| 36 |
+
|
| 37 |
+
TEXT: {text}
|
| 38 |
+
|
| 39 |
+
SUMMARY:
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
for attempt in range(max_retries):
|
| 43 |
+
try:
|
| 44 |
+
response = together.Complete.create(
|
| 45 |
+
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 46 |
+
prompt=prompt,
|
| 47 |
+
max_tokens=600, # Increased to 600 tokens (~450-600 words) for a longer, detailed summary
|
| 48 |
+
temperature=0.3, # Kept low for focus and coherence
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# Debugging: Print full API response (optional, remove in production)
|
| 52 |
+
print("Raw API Response:", response)
|
| 53 |
+
|
| 54 |
+
# Extract summary from response
|
| 55 |
+
if isinstance(response, dict) and "output" in response:
|
| 56 |
+
summary = response["output"]
|
| 57 |
+
return {"output_text": summary.strip()}
|
| 58 |
+
elif isinstance(response, dict) and "choices" in response:
|
| 59 |
+
summary = response["choices"][0]["text"]
|
| 60 |
+
return {"output_text": summary.strip()}
|
| 61 |
+
else:
|
| 62 |
+
raise KeyError("Unexpected API response format.")
|
| 63 |
+
|
| 64 |
+
except Exception as e:
|
| 65 |
+
if "rate_limit_exceeded" in str(e) and attempt < max_retries - 1:
|
| 66 |
+
st.warning(f"Attempt {attempt + 1}/{max_retries}: Rate limit exceeded. Retrying in {retry_delay} seconds...")
|
| 67 |
+
time.sleep(retry_delay)
|
| 68 |
+
else:
|
| 69 |
+
st.error(f"❌ Error during summarization: {e}")
|
| 70 |
+
return {"output_text": "Summarization failed. Try again later."}
|
| 71 |
+
|
| 72 |
+
return {"output_text": "Summarization failed after multiple attempts."}
|