Spaces:

vanshs055
/

ViraSat

Sleeping

App Files Files Community

ViraSat / src /app.py

vanshs055

Update src/app.py

677cb35 verified 7 months ago

raw

history blame contribute delete

7.08 kB

	# app.py (Full Corrected Code for Hugging Face Spaces)

	import streamlit as st
	import httpx
	from bs4 import BeautifulSoup
	import random
	from transformers import pipeline
	import os # Import 'os' to read environment variables

	# --- Page Configuration ---
	st.set_page_config(page_title="VirasaaT", layout="centered")

	# --- Caching & Model Loading ---

	@st.cache_resource
	def load_summarizer(token: str):
	"""
	Loads the Hugging Face summarization model using a token.
	This is cached as a resource to be loaded only once per session.
	"""
	print("Loading AI summarization model...")
	if not token:
	# This error is critical for deployment. It tells the user the secret is missing.
	st.error(
	"Hugging Face API token not found. Please set the 'HUGGINGFACE_TOKEN' secret in your Space settings.",
	icon="🔒"
	)
	st.stop()

	# Use 'token' instead of the deprecated 'use_auth_token'.
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn", token=token)
	print("Model loaded successfully.")
	return summarizer

	# --- Secret and Model Initialization (Corrected for Hugging Face Spaces) ---

	# 1. Read the secret from environment variables provided by Hugging Face Spaces.
	HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")

	# 2. Load the model once using the token.
	# This happens on app startup. The app will stop here if the token is not found.
	summarizer = load_summarizer(HUGGINGFACE_TOKEN)


	@st.cache_data(ttl=3600) # Cache Wikipedia API data for 1 hour
	def get_recommendations(state: str):
	"""Fetches cultural topics from Wikipedia's category system."""
	print(f"Fetching recommendations for {state}...")
	category_title = f"Category:Culture of {state}"
	params = {"action": "query", "format": "json", "list": "categorymembers", "cmtitle": category_title, "cmlimit": 50, "cmtype": "page"}
	try:
	with httpx.Client() as client:
	res = client.get("https://en.wikipedia.org/w/api.php", params=params)
	res.raise_for_status()
	data = res.json()
	members = data.get("query", {}).get("categorymembers", [])
	if not members: return []
	titles = [member["title"] for member in members if "List of" not in member["title"]]
	random.shuffle(titles)
	return titles[:15]
	except Exception as e:
	print(f"Error fetching recommendations: {e}")
	return []

	@st.cache_data(ttl=3600)
	def get_wiki_summary_and_image(_summarizer, query: str):
	"""
	Fetches, summarizes, and extracts an image for a Wikipedia article.
	This function now ACCEPTS the summarizer object instead of loading it.
	"""
	print(f"Fetching and processing article for '{query}'...")
	params = {"action": "parse", "page": query, "format": "json", "prop": "text\|images", "redirects": True}
	try:
	with httpx.Client() as client:
	res = client.get("https://en.wikipedia.org/w/api.php", params=params)
	res.raise_for_status()
	data = res.json()

	if "error" in data:
	print(f"Wikipedia API error for query '{query}': {data['error']}")
	return None

	parse_data = data["parse"]
	title = parse_data["title"]
	html_content = parse_data["text"]["*"]
	soup = BeautifulSoup(html_content, "html.parser")

	# Find image more robustly
	img_tag = soup.select_one(".infobox .image img") or soup.find("img")
	image_url = f"https:{img_tag['src']}" if img_tag and 'src' in img_tag.attrs else None

	full_text = " ".join([p.get_text() for p in soup.find_all("p") if p.get_text()])

	if not full_text.strip():
	print(f"No text content found for '{query}'")
	return None

	# The summarizer is now passed in, not loaded here.
	truncated_text = " ".join(full_text.split()[:1024]) # BART's max token limit
	print(f"Summarizing '{query}'...")
	summary_result = _summarizer(truncated_text, max_length=150, min_length=40, do_sample=False)
	print("Summarization complete.")

	return {
	"title": title,
	"summary": summary_result[0]['summary_text'],
	"image": image_url,
	"url": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
	}
	except Exception as e:
	print(f"Error in get_wiki_summary_and_image for query '{query}': {e}")
	return None

	# --- Streamlit User Interface ---
	st.title("🇮🇳 VirasaaT – AI-Powered Culture Explorer")
	st.markdown("Discover the cultural richness of India with dynamic recommendations and AI-generated summaries.")

	states_list = ["Select a state", "Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana", "Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur", "Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana", "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal"]

	# Initialize session state for the search query
	if "query" not in st.session_state:
	st.session_state.query = ""

	selected_state = st.selectbox("Step 1: Choose a State to Get Dynamic Recommendations", states_list)

	if selected_state != "Select a state":
	recommendations = get_recommendations(selected_state)
	if recommendations:
	display_recommendations = ["Select a topic"] + recommendations
	selected_topic = st.selectbox("Step 2: Select a Recommended Topic", display_recommendations)
	if selected_topic != "Select a topic":
	st.session_state.query = selected_topic
	else:
	st.warning(f"Could not find dynamic recommendations for {selected_state}. Please search manually below.")

	manual_query = st.text_input("Or Enter Any Cultural Topic Manually", placeholder="e.g., Diwali, Yoga, Taj Mahal")

	if manual_query:
	st.session_state.query = manual_query

	if st.button("✨ Explore Culture", type="primary"):
	query_to_explore = st.session_state.query

	if not query_to_explore.strip():
	st.warning("Please select a state and a topic, or enter a topic manually.")
	else:
	with st.spinner(f"AI is exploring '{query_to_explore}'..."):
	# Pass the globally loaded summarizer object into the function.
	article_data = get_wiki_summary_and_image(summarizer, query_to_explore)

	if article_data:
	st.subheader(article_data["title"])
	if article_data["image"]:
	st.image(article_data["image"], width=300, caption=article_data["title"])
	st.markdown("### 📖 AI-Generated Summary")
	st.write(article_data["summary"])
	st.markdown(f"[🔗 Read Full Article on Wikipedia]({article_data['url']})")
	else:
	st.error(f"Could not retrieve or summarize the article for '{query_to_explore}'. Please try another topic.")