Update src/app.py
Browse files- src/app.py +33 -27
src/app.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import httpx
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
import random
|
| 5 |
from transformers import pipeline
|
|
|
|
| 6 |
|
| 7 |
# --- Page Configuration ---
|
| 8 |
st.set_page_config(page_title="VirasaaT", layout="centered")
|
|
@@ -10,29 +13,36 @@ st.set_page_config(page_title="VirasaaT", layout="centered")
|
|
| 10 |
# --- Caching & Model Loading ---
|
| 11 |
|
| 12 |
@st.cache_resource
|
| 13 |
-
def load_summarizer(token
|
| 14 |
"""
|
| 15 |
-
Loads the Hugging Face summarization model.
|
| 16 |
-
This is cached as a resource to be loaded only once.
|
| 17 |
"""
|
| 18 |
print("Loading AI summarization model...")
|
| 19 |
if not token:
|
| 20 |
-
# This error
|
| 21 |
-
st.error(
|
|
|
|
|
|
|
|
|
|
| 22 |
st.stop()
|
| 23 |
|
| 24 |
-
#
|
| 25 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", token=token)
|
| 26 |
print("Model loaded successfully.")
|
| 27 |
return summarizer
|
| 28 |
|
| 29 |
-
#
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
summarizer = load_summarizer(HUGGINGFACE_TOKEN)
|
| 33 |
|
| 34 |
|
| 35 |
-
@st.cache_data(ttl=3600) # Cache API data for 1 hour
|
| 36 |
def get_recommendations(state: str):
|
| 37 |
"""Fetches cultural topics from Wikipedia's category system."""
|
| 38 |
print(f"Fetching recommendations for {state}...")
|
|
@@ -53,9 +63,11 @@ def get_recommendations(state: str):
|
|
| 53 |
return []
|
| 54 |
|
| 55 |
@st.cache_data(ttl=3600)
|
| 56 |
-
# CHANGED: The function now accepts the summarizer object as an argument.
|
| 57 |
def get_wiki_summary_and_image(_summarizer, query: str):
|
| 58 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 59 |
print(f"Fetching and processing article for '{query}'...")
|
| 60 |
params = {"action": "parse", "page": query, "format": "json", "prop": "text|images", "redirects": True}
|
| 61 |
try:
|
|
@@ -64,7 +76,7 @@ def get_wiki_summary_and_image(_summarizer, query: str):
|
|
| 64 |
res.raise_for_status()
|
| 65 |
data = res.json()
|
| 66 |
|
| 67 |
-
if "error" in data:
|
| 68 |
print(f"Wikipedia API error for query '{query}': {data['error']}")
|
| 69 |
return None
|
| 70 |
|
|
@@ -73,18 +85,13 @@ def get_wiki_summary_and_image(_summarizer, query: str):
|
|
| 73 |
html_content = parse_data["text"]["*"]
|
| 74 |
soup = BeautifulSoup(html_content, "html.parser")
|
| 75 |
|
| 76 |
-
# Find image more robustly
|
| 77 |
-
|
| 78 |
-
if infobox_img:
|
| 79 |
-
img_tag = infobox_img
|
| 80 |
-
else:
|
| 81 |
-
img_tag = soup.find("img") # Fallback to first image
|
| 82 |
-
|
| 83 |
image_url = f"https:{img_tag['src']}" if img_tag and 'src' in img_tag.attrs else None
|
| 84 |
|
| 85 |
full_text = " ".join([p.get_text() for p in soup.find_all("p") if p.get_text()])
|
| 86 |
|
| 87 |
-
if not full_text.strip():
|
| 88 |
print(f"No text content found for '{query}'")
|
| 89 |
return None
|
| 90 |
|
|
@@ -107,6 +114,7 @@ def get_wiki_summary_and_image(_summarizer, query: str):
|
|
| 107 |
# --- Streamlit User Interface ---
|
| 108 |
st.title("๐ฎ๐ณ VirasaaT โ AI-Powered Culture Explorer")
|
| 109 |
st.markdown("Discover the cultural richness of India with dynamic recommendations and AI-generated summaries.")
|
|
|
|
| 110 |
states_list = ["Select a state", "Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana", "Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur", "Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana", "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal"]
|
| 111 |
|
| 112 |
# Initialize session state for the search query
|
|
@@ -119,27 +127,25 @@ if selected_state != "Select a state":
|
|
| 119 |
recommendations = get_recommendations(selected_state)
|
| 120 |
if recommendations:
|
| 121 |
display_recommendations = ["Select a topic"] + recommendations
|
| 122 |
-
selected_topic = st.selectbox("Step 2: Select a Recommended Topic", display_recommendations
|
| 123 |
if selected_topic != "Select a topic":
|
| 124 |
st.session_state.query = selected_topic
|
| 125 |
else:
|
| 126 |
st.warning(f"Could not find dynamic recommendations for {selected_state}. Please search manually below.")
|
| 127 |
|
| 128 |
-
manual_query = st.text_input("Or Enter Any Cultural Topic Manually", placeholder="e.g., Diwali, Yoga, Taj Mahal"
|
| 129 |
|
| 130 |
if manual_query:
|
| 131 |
st.session_state.query = manual_query
|
| 132 |
|
| 133 |
if st.button("โจ Explore Culture", type="primary"):
|
| 134 |
-
# Use the query from session state, which is set by either the dropdown or the text box
|
| 135 |
query_to_explore = st.session_state.query
|
| 136 |
|
| 137 |
if not query_to_explore.strip():
|
| 138 |
st.warning("Please select a state and a topic, or enter a topic manually.")
|
| 139 |
else:
|
| 140 |
with st.spinner(f"AI is exploring '{query_to_explore}'..."):
|
| 141 |
-
#
|
| 142 |
-
# Note the leading underscore in _summarizer to avoid name clashes with the global variable.
|
| 143 |
article_data = get_wiki_summary_and_image(summarizer, query_to_explore)
|
| 144 |
|
| 145 |
if article_data:
|
|
@@ -150,4 +156,4 @@ if st.button("โจ Explore Culture", type="primary"):
|
|
| 150 |
st.write(article_data["summary"])
|
| 151 |
st.markdown(f"**[๐ Read Full Article on Wikipedia]({article_data['url']})**")
|
| 152 |
else:
|
| 153 |
-
st.error(f"Could not retrieve or summarize the article for '{query_to_explore}'.
|
|
|
|
| 1 |
+
# app.py (Full Corrected Code for Hugging Face Spaces)
|
| 2 |
+
|
| 3 |
import streamlit as st
|
| 4 |
import httpx
|
| 5 |
from bs4 import BeautifulSoup
|
| 6 |
import random
|
| 7 |
from transformers import pipeline
|
| 8 |
+
import os # Import 'os' to read environment variables
|
| 9 |
|
| 10 |
# --- Page Configuration ---
|
| 11 |
st.set_page_config(page_title="VirasaaT", layout="centered")
|
|
|
|
| 13 |
# --- Caching & Model Loading ---
|
| 14 |
|
| 15 |
@st.cache_resource
|
| 16 |
+
def load_summarizer(token: str):
|
| 17 |
"""
|
| 18 |
+
Loads the Hugging Face summarization model using a token.
|
| 19 |
+
This is cached as a resource to be loaded only once per session.
|
| 20 |
"""
|
| 21 |
print("Loading AI summarization model...")
|
| 22 |
if not token:
|
| 23 |
+
# This error is critical for deployment. It tells the user the secret is missing.
|
| 24 |
+
st.error(
|
| 25 |
+
"Hugging Face API token not found. Please set the 'HUGGINGFACE_TOKEN' secret in your Space settings.",
|
| 26 |
+
icon="๐"
|
| 27 |
+
)
|
| 28 |
st.stop()
|
| 29 |
|
| 30 |
+
# Use 'token' instead of the deprecated 'use_auth_token'.
|
| 31 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", token=token)
|
| 32 |
print("Model loaded successfully.")
|
| 33 |
return summarizer
|
| 34 |
|
| 35 |
+
# --- Secret and Model Initialization (Corrected for Hugging Face Spaces) ---
|
| 36 |
+
|
| 37 |
+
# 1. Read the secret from environment variables provided by Hugging Face Spaces.
|
| 38 |
+
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
| 39 |
+
|
| 40 |
+
# 2. Load the model once using the token.
|
| 41 |
+
# This happens on app startup. The app will stop here if the token is not found.
|
| 42 |
summarizer = load_summarizer(HUGGINGFACE_TOKEN)
|
| 43 |
|
| 44 |
|
| 45 |
+
@st.cache_data(ttl=3600) # Cache Wikipedia API data for 1 hour
|
| 46 |
def get_recommendations(state: str):
|
| 47 |
"""Fetches cultural topics from Wikipedia's category system."""
|
| 48 |
print(f"Fetching recommendations for {state}...")
|
|
|
|
| 63 |
return []
|
| 64 |
|
| 65 |
@st.cache_data(ttl=3600)
|
|
|
|
| 66 |
def get_wiki_summary_and_image(_summarizer, query: str):
|
| 67 |
+
"""
|
| 68 |
+
Fetches, summarizes, and extracts an image for a Wikipedia article.
|
| 69 |
+
This function now ACCEPTS the summarizer object instead of loading it.
|
| 70 |
+
"""
|
| 71 |
print(f"Fetching and processing article for '{query}'...")
|
| 72 |
params = {"action": "parse", "page": query, "format": "json", "prop": "text|images", "redirects": True}
|
| 73 |
try:
|
|
|
|
| 76 |
res.raise_for_status()
|
| 77 |
data = res.json()
|
| 78 |
|
| 79 |
+
if "error" in data:
|
| 80 |
print(f"Wikipedia API error for query '{query}': {data['error']}")
|
| 81 |
return None
|
| 82 |
|
|
|
|
| 85 |
html_content = parse_data["text"]["*"]
|
| 86 |
soup = BeautifulSoup(html_content, "html.parser")
|
| 87 |
|
| 88 |
+
# Find image more robustly
|
| 89 |
+
img_tag = soup.select_one(".infobox .image img") or soup.find("img")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
image_url = f"https:{img_tag['src']}" if img_tag and 'src' in img_tag.attrs else None
|
| 91 |
|
| 92 |
full_text = " ".join([p.get_text() for p in soup.find_all("p") if p.get_text()])
|
| 93 |
|
| 94 |
+
if not full_text.strip():
|
| 95 |
print(f"No text content found for '{query}'")
|
| 96 |
return None
|
| 97 |
|
|
|
|
| 114 |
# --- Streamlit User Interface ---
|
| 115 |
st.title("๐ฎ๐ณ VirasaaT โ AI-Powered Culture Explorer")
|
| 116 |
st.markdown("Discover the cultural richness of India with dynamic recommendations and AI-generated summaries.")
|
| 117 |
+
|
| 118 |
states_list = ["Select a state", "Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana", "Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur", "Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana", "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal"]
|
| 119 |
|
| 120 |
# Initialize session state for the search query
|
|
|
|
| 127 |
recommendations = get_recommendations(selected_state)
|
| 128 |
if recommendations:
|
| 129 |
display_recommendations = ["Select a topic"] + recommendations
|
| 130 |
+
selected_topic = st.selectbox("Step 2: Select a Recommended Topic", display_recommendations)
|
| 131 |
if selected_topic != "Select a topic":
|
| 132 |
st.session_state.query = selected_topic
|
| 133 |
else:
|
| 134 |
st.warning(f"Could not find dynamic recommendations for {selected_state}. Please search manually below.")
|
| 135 |
|
| 136 |
+
manual_query = st.text_input("Or Enter Any Cultural Topic Manually", placeholder="e.g., Diwali, Yoga, Taj Mahal")
|
| 137 |
|
| 138 |
if manual_query:
|
| 139 |
st.session_state.query = manual_query
|
| 140 |
|
| 141 |
if st.button("โจ Explore Culture", type="primary"):
|
|
|
|
| 142 |
query_to_explore = st.session_state.query
|
| 143 |
|
| 144 |
if not query_to_explore.strip():
|
| 145 |
st.warning("Please select a state and a topic, or enter a topic manually.")
|
| 146 |
else:
|
| 147 |
with st.spinner(f"AI is exploring '{query_to_explore}'..."):
|
| 148 |
+
# Pass the globally loaded summarizer object into the function.
|
|
|
|
| 149 |
article_data = get_wiki_summary_and_image(summarizer, query_to_explore)
|
| 150 |
|
| 151 |
if article_data:
|
|
|
|
| 156 |
st.write(article_data["summary"])
|
| 157 |
st.markdown(f"**[๐ Read Full Article on Wikipedia]({article_data['url']})**")
|
| 158 |
else:
|
| 159 |
+
st.error(f"Could not retrieve or summarize the article for '{query_to_explore}'. Please try another topic.")
|