|
|
|
|
|
|
|
|
import streamlit as st |
|
|
import httpx |
|
|
from bs4 import BeautifulSoup |
|
|
import random |
|
|
from transformers import pipeline |
|
|
import os |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="VirasaaT", layout="centered") |
|
|
|
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_summarizer(token: str): |
|
|
""" |
|
|
Loads the Hugging Face summarization model using a token. |
|
|
This is cached as a resource to be loaded only once per session. |
|
|
""" |
|
|
print("Loading AI summarization model...") |
|
|
if not token: |
|
|
|
|
|
st.error( |
|
|
"Hugging Face API token not found. Please set the 'HUGGINGFACE_TOKEN' secret in your Space settings.", |
|
|
icon="๐" |
|
|
) |
|
|
st.stop() |
|
|
|
|
|
|
|
|
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", token=token) |
|
|
print("Model loaded successfully.") |
|
|
return summarizer |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN") |
|
|
|
|
|
|
|
|
|
|
|
summarizer = load_summarizer(HUGGINGFACE_TOKEN) |
|
|
|
|
|
|
|
|
@st.cache_data(ttl=3600) |
|
|
def get_recommendations(state: str): |
|
|
"""Fetches cultural topics from Wikipedia's category system.""" |
|
|
print(f"Fetching recommendations for {state}...") |
|
|
category_title = f"Category:Culture of {state}" |
|
|
params = {"action": "query", "format": "json", "list": "categorymembers", "cmtitle": category_title, "cmlimit": 50, "cmtype": "page"} |
|
|
try: |
|
|
with httpx.Client() as client: |
|
|
res = client.get("https://en.wikipedia.org/w/api.php", params=params) |
|
|
res.raise_for_status() |
|
|
data = res.json() |
|
|
members = data.get("query", {}).get("categorymembers", []) |
|
|
if not members: return [] |
|
|
titles = [member["title"] for member in members if "List of" not in member["title"]] |
|
|
random.shuffle(titles) |
|
|
return titles[:15] |
|
|
except Exception as e: |
|
|
print(f"Error fetching recommendations: {e}") |
|
|
return [] |
|
|
|
|
|
@st.cache_data(ttl=3600) |
|
|
def get_wiki_summary_and_image(_summarizer, query: str): |
|
|
""" |
|
|
Fetches, summarizes, and extracts an image for a Wikipedia article. |
|
|
This function now ACCEPTS the summarizer object instead of loading it. |
|
|
""" |
|
|
print(f"Fetching and processing article for '{query}'...") |
|
|
params = {"action": "parse", "page": query, "format": "json", "prop": "text|images", "redirects": True} |
|
|
try: |
|
|
with httpx.Client() as client: |
|
|
res = client.get("https://en.wikipedia.org/w/api.php", params=params) |
|
|
res.raise_for_status() |
|
|
data = res.json() |
|
|
|
|
|
if "error" in data: |
|
|
print(f"Wikipedia API error for query '{query}': {data['error']}") |
|
|
return None |
|
|
|
|
|
parse_data = data["parse"] |
|
|
title = parse_data["title"] |
|
|
html_content = parse_data["text"]["*"] |
|
|
soup = BeautifulSoup(html_content, "html.parser") |
|
|
|
|
|
|
|
|
img_tag = soup.select_one(".infobox .image img") or soup.find("img") |
|
|
image_url = f"https:{img_tag['src']}" if img_tag and 'src' in img_tag.attrs else None |
|
|
|
|
|
full_text = " ".join([p.get_text() for p in soup.find_all("p") if p.get_text()]) |
|
|
|
|
|
if not full_text.strip(): |
|
|
print(f"No text content found for '{query}'") |
|
|
return None |
|
|
|
|
|
|
|
|
truncated_text = " ".join(full_text.split()[:1024]) |
|
|
print(f"Summarizing '{query}'...") |
|
|
summary_result = _summarizer(truncated_text, max_length=150, min_length=40, do_sample=False) |
|
|
print("Summarization complete.") |
|
|
|
|
|
return { |
|
|
"title": title, |
|
|
"summary": summary_result[0]['summary_text'], |
|
|
"image": image_url, |
|
|
"url": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}" |
|
|
} |
|
|
except Exception as e: |
|
|
print(f"Error in get_wiki_summary_and_image for query '{query}': {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
st.title("๐ฎ๐ณ VirasaaT โ AI-Powered Culture Explorer") |
|
|
st.markdown("Discover the cultural richness of India with dynamic recommendations and AI-generated summaries.") |
|
|
|
|
|
states_list = ["Select a state", "Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana", "Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur", "Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana", "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal"] |
|
|
|
|
|
|
|
|
if "query" not in st.session_state: |
|
|
st.session_state.query = "" |
|
|
|
|
|
selected_state = st.selectbox("Step 1: Choose a State to Get Dynamic Recommendations", states_list) |
|
|
|
|
|
if selected_state != "Select a state": |
|
|
recommendations = get_recommendations(selected_state) |
|
|
if recommendations: |
|
|
display_recommendations = ["Select a topic"] + recommendations |
|
|
selected_topic = st.selectbox("Step 2: Select a Recommended Topic", display_recommendations) |
|
|
if selected_topic != "Select a topic": |
|
|
st.session_state.query = selected_topic |
|
|
else: |
|
|
st.warning(f"Could not find dynamic recommendations for {selected_state}. Please search manually below.") |
|
|
|
|
|
manual_query = st.text_input("Or Enter Any Cultural Topic Manually", placeholder="e.g., Diwali, Yoga, Taj Mahal") |
|
|
|
|
|
if manual_query: |
|
|
st.session_state.query = manual_query |
|
|
|
|
|
if st.button("โจ Explore Culture", type="primary"): |
|
|
query_to_explore = st.session_state.query |
|
|
|
|
|
if not query_to_explore.strip(): |
|
|
st.warning("Please select a state and a topic, or enter a topic manually.") |
|
|
else: |
|
|
with st.spinner(f"AI is exploring '{query_to_explore}'..."): |
|
|
|
|
|
article_data = get_wiki_summary_and_image(summarizer, query_to_explore) |
|
|
|
|
|
if article_data: |
|
|
st.subheader(article_data["title"]) |
|
|
if article_data["image"]: |
|
|
st.image(article_data["image"], width=300, caption=article_data["title"]) |
|
|
st.markdown("### ๐ AI-Generated Summary") |
|
|
st.write(article_data["summary"]) |
|
|
st.markdown(f"**[๐ Read Full Article on Wikipedia]({article_data['url']})**") |
|
|
else: |
|
|
st.error(f"Could not retrieve or summarize the article for '{query_to_explore}'. Please try another topic.") |