# app.py (Full Corrected Code for Hugging Face Spaces) import streamlit as st import httpx from bs4 import BeautifulSoup import random from transformers import pipeline import os # Import 'os' to read environment variables # --- Page Configuration --- st.set_page_config(page_title="VirasaaT", layout="centered") # --- Caching & Model Loading --- @st.cache_resource def load_summarizer(token: str): """ Loads the Hugging Face summarization model using a token. This is cached as a resource to be loaded only once per session. """ print("Loading AI summarization model...") if not token: # This error is critical for deployment. It tells the user the secret is missing. st.error( "Hugging Face API token not found. Please set the 'HUGGINGFACE_TOKEN' secret in your Space settings.", icon="🔒" ) st.stop() # Use 'token' instead of the deprecated 'use_auth_token'. summarizer = pipeline("summarization", model="facebook/bart-large-cnn", token=token) print("Model loaded successfully.") return summarizer # --- Secret and Model Initialization (Corrected for Hugging Face Spaces) --- # 1. Read the secret from environment variables provided by Hugging Face Spaces. HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN") # 2. Load the model once using the token. # This happens on app startup. The app will stop here if the token is not found. summarizer = load_summarizer(HUGGINGFACE_TOKEN) @st.cache_data(ttl=3600) # Cache Wikipedia API data for 1 hour def get_recommendations(state: str): """Fetches cultural topics from Wikipedia's category system.""" print(f"Fetching recommendations for {state}...") category_title = f"Category:Culture of {state}" params = {"action": "query", "format": "json", "list": "categorymembers", "cmtitle": category_title, "cmlimit": 50, "cmtype": "page"} try: with httpx.Client() as client: res = client.get("https://en.wikipedia.org/w/api.php", params=params) res.raise_for_status() data = res.json() members = data.get("query", {}).get("categorymembers", []) if not members: return [] titles = [member["title"] for member in members if "List of" not in member["title"]] random.shuffle(titles) return titles[:15] except Exception as e: print(f"Error fetching recommendations: {e}") return [] @st.cache_data(ttl=3600) def get_wiki_summary_and_image(_summarizer, query: str): """ Fetches, summarizes, and extracts an image for a Wikipedia article. This function now ACCEPTS the summarizer object instead of loading it. """ print(f"Fetching and processing article for '{query}'...") params = {"action": "parse", "page": query, "format": "json", "prop": "text|images", "redirects": True} try: with httpx.Client() as client: res = client.get("https://en.wikipedia.org/w/api.php", params=params) res.raise_for_status() data = res.json() if "error" in data: print(f"Wikipedia API error for query '{query}': {data['error']}") return None parse_data = data["parse"] title = parse_data["title"] html_content = parse_data["text"]["*"] soup = BeautifulSoup(html_content, "html.parser") # Find image more robustly img_tag = soup.select_one(".infobox .image img") or soup.find("img") image_url = f"https:{img_tag['src']}" if img_tag and 'src' in img_tag.attrs else None full_text = " ".join([p.get_text() for p in soup.find_all("p") if p.get_text()]) if not full_text.strip(): print(f"No text content found for '{query}'") return None # The summarizer is now passed in, not loaded here. truncated_text = " ".join(full_text.split()[:1024]) # BART's max token limit print(f"Summarizing '{query}'...") summary_result = _summarizer(truncated_text, max_length=150, min_length=40, do_sample=False) print("Summarization complete.") return { "title": title, "summary": summary_result[0]['summary_text'], "image": image_url, "url": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}" } except Exception as e: print(f"Error in get_wiki_summary_and_image for query '{query}': {e}") return None # --- Streamlit User Interface --- st.title("🇮🇳 VirasaaT – AI-Powered Culture Explorer") st.markdown("Discover the cultural richness of India with dynamic recommendations and AI-generated summaries.") states_list = ["Select a state", "Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana", "Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur", "Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana", "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal"] # Initialize session state for the search query if "query" not in st.session_state: st.session_state.query = "" selected_state = st.selectbox("Step 1: Choose a State to Get Dynamic Recommendations", states_list) if selected_state != "Select a state": recommendations = get_recommendations(selected_state) if recommendations: display_recommendations = ["Select a topic"] + recommendations selected_topic = st.selectbox("Step 2: Select a Recommended Topic", display_recommendations) if selected_topic != "Select a topic": st.session_state.query = selected_topic else: st.warning(f"Could not find dynamic recommendations for {selected_state}. Please search manually below.") manual_query = st.text_input("Or Enter Any Cultural Topic Manually", placeholder="e.g., Diwali, Yoga, Taj Mahal") if manual_query: st.session_state.query = manual_query if st.button("✨ Explore Culture", type="primary"): query_to_explore = st.session_state.query if not query_to_explore.strip(): st.warning("Please select a state and a topic, or enter a topic manually.") else: with st.spinner(f"AI is exploring '{query_to_explore}'..."): # Pass the globally loaded summarizer object into the function. article_data = get_wiki_summary_and_image(summarizer, query_to_explore) if article_data: st.subheader(article_data["title"]) if article_data["image"]: st.image(article_data["image"], width=300, caption=article_data["title"]) st.markdown("### 📖 AI-Generated Summary") st.write(article_data["summary"]) st.markdown(f"**[🔗 Read Full Article on Wikipedia]({article_data['url']})**") else: st.error(f"Could not retrieve or summarize the article for '{query_to_explore}'. Please try another topic.")