ViraSat / src /app.py
vanshs055's picture
Update src/app.py
677cb35 verified
# app.py (Full Corrected Code for Hugging Face Spaces)
import streamlit as st
import httpx
from bs4 import BeautifulSoup
import random
from transformers import pipeline
import os # Import 'os' to read environment variables
# --- Page Configuration ---
st.set_page_config(page_title="VirasaaT", layout="centered")
# --- Caching & Model Loading ---
@st.cache_resource
def load_summarizer(token: str):
"""
Loads the Hugging Face summarization model using a token.
This is cached as a resource to be loaded only once per session.
"""
print("Loading AI summarization model...")
if not token:
# This error is critical for deployment. It tells the user the secret is missing.
st.error(
"Hugging Face API token not found. Please set the 'HUGGINGFACE_TOKEN' secret in your Space settings.",
icon="๐Ÿ”’"
)
st.stop()
# Use 'token' instead of the deprecated 'use_auth_token'.
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", token=token)
print("Model loaded successfully.")
return summarizer
# --- Secret and Model Initialization (Corrected for Hugging Face Spaces) ---
# 1. Read the secret from environment variables provided by Hugging Face Spaces.
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
# 2. Load the model once using the token.
# This happens on app startup. The app will stop here if the token is not found.
summarizer = load_summarizer(HUGGINGFACE_TOKEN)
@st.cache_data(ttl=3600) # Cache Wikipedia API data for 1 hour
def get_recommendations(state: str):
"""Fetches cultural topics from Wikipedia's category system."""
print(f"Fetching recommendations for {state}...")
category_title = f"Category:Culture of {state}"
params = {"action": "query", "format": "json", "list": "categorymembers", "cmtitle": category_title, "cmlimit": 50, "cmtype": "page"}
try:
with httpx.Client() as client:
res = client.get("https://en.wikipedia.org/w/api.php", params=params)
res.raise_for_status()
data = res.json()
members = data.get("query", {}).get("categorymembers", [])
if not members: return []
titles = [member["title"] for member in members if "List of" not in member["title"]]
random.shuffle(titles)
return titles[:15]
except Exception as e:
print(f"Error fetching recommendations: {e}")
return []
@st.cache_data(ttl=3600)
def get_wiki_summary_and_image(_summarizer, query: str):
"""
Fetches, summarizes, and extracts an image for a Wikipedia article.
This function now ACCEPTS the summarizer object instead of loading it.
"""
print(f"Fetching and processing article for '{query}'...")
params = {"action": "parse", "page": query, "format": "json", "prop": "text|images", "redirects": True}
try:
with httpx.Client() as client:
res = client.get("https://en.wikipedia.org/w/api.php", params=params)
res.raise_for_status()
data = res.json()
if "error" in data:
print(f"Wikipedia API error for query '{query}': {data['error']}")
return None
parse_data = data["parse"]
title = parse_data["title"]
html_content = parse_data["text"]["*"]
soup = BeautifulSoup(html_content, "html.parser")
# Find image more robustly
img_tag = soup.select_one(".infobox .image img") or soup.find("img")
image_url = f"https:{img_tag['src']}" if img_tag and 'src' in img_tag.attrs else None
full_text = " ".join([p.get_text() for p in soup.find_all("p") if p.get_text()])
if not full_text.strip():
print(f"No text content found for '{query}'")
return None
# The summarizer is now passed in, not loaded here.
truncated_text = " ".join(full_text.split()[:1024]) # BART's max token limit
print(f"Summarizing '{query}'...")
summary_result = _summarizer(truncated_text, max_length=150, min_length=40, do_sample=False)
print("Summarization complete.")
return {
"title": title,
"summary": summary_result[0]['summary_text'],
"image": image_url,
"url": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
}
except Exception as e:
print(f"Error in get_wiki_summary_and_image for query '{query}': {e}")
return None
# --- Streamlit User Interface ---
st.title("๐Ÿ‡ฎ๐Ÿ‡ณ VirasaaT โ€“ AI-Powered Culture Explorer")
st.markdown("Discover the cultural richness of India with dynamic recommendations and AI-generated summaries.")
states_list = ["Select a state", "Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana", "Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur", "Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana", "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal"]
# Initialize session state for the search query
if "query" not in st.session_state:
st.session_state.query = ""
selected_state = st.selectbox("Step 1: Choose a State to Get Dynamic Recommendations", states_list)
if selected_state != "Select a state":
recommendations = get_recommendations(selected_state)
if recommendations:
display_recommendations = ["Select a topic"] + recommendations
selected_topic = st.selectbox("Step 2: Select a Recommended Topic", display_recommendations)
if selected_topic != "Select a topic":
st.session_state.query = selected_topic
else:
st.warning(f"Could not find dynamic recommendations for {selected_state}. Please search manually below.")
manual_query = st.text_input("Or Enter Any Cultural Topic Manually", placeholder="e.g., Diwali, Yoga, Taj Mahal")
if manual_query:
st.session_state.query = manual_query
if st.button("โœจ Explore Culture", type="primary"):
query_to_explore = st.session_state.query
if not query_to_explore.strip():
st.warning("Please select a state and a topic, or enter a topic manually.")
else:
with st.spinner(f"AI is exploring '{query_to_explore}'..."):
# Pass the globally loaded summarizer object into the function.
article_data = get_wiki_summary_and_image(summarizer, query_to_explore)
if article_data:
st.subheader(article_data["title"])
if article_data["image"]:
st.image(article_data["image"], width=300, caption=article_data["title"])
st.markdown("### ๐Ÿ“– AI-Generated Summary")
st.write(article_data["summary"])
st.markdown(f"**[๐Ÿ”— Read Full Article on Wikipedia]({article_data['url']})**")
else:
st.error(f"Could not retrieve or summarize the article for '{query_to_explore}'. Please try another topic.")