File size: 7,081 Bytes
677cb35
 
c110548
 
 
 
 
677cb35
c110548
 
 
 
 
20006ce
c110548
677cb35
c110548
677cb35
 
c110548
 
 
677cb35
 
 
 
 
c110548
 
677cb35
20006ce
c110548
 
 
677cb35
 
 
 
 
 
 
20006ce
 
 
677cb35
c110548
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20006ce
677cb35
 
 
 
20006ce
c110548
 
 
 
 
 
20006ce
677cb35
20006ce
 
 
c110548
 
 
 
20006ce
677cb35
 
c110548
20006ce
c110548
20006ce
677cb35
20006ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c110548
20006ce
c110548
 
 
 
 
677cb35
c110548
20006ce
 
c110548
 
20006ce
 
 
c110548
 
 
 
677cb35
c110548
 
 
 
20006ce
677cb35
20006ce
c110548
 
20006ce
 
c110548
20006ce
c110548
 
 
20006ce
677cb35
20006ce
 
c110548
 
 
 
 
 
 
 
677cb35
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# app.py (Full Corrected Code for Hugging Face Spaces)

import streamlit as st
import httpx
from bs4 import BeautifulSoup
import random
from transformers import pipeline
import os  # Import 'os' to read environment variables

# --- Page Configuration ---
st.set_page_config(page_title="VirasaaT", layout="centered")

# --- Caching & Model Loading ---

@st.cache_resource
def load_summarizer(token: str):
    """
    Loads the Hugging Face summarization model using a token.
    This is cached as a resource to be loaded only once per session.
    """
    print("Loading AI summarization model...")
    if not token:
        # This error is critical for deployment. It tells the user the secret is missing.
        st.error(
            "Hugging Face API token not found. Please set the 'HUGGINGFACE_TOKEN' secret in your Space settings.",
            icon="๐Ÿ”’"
        )
        st.stop()
        
    # Use 'token' instead of the deprecated 'use_auth_token'.
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn", token=token)
    print("Model loaded successfully.")
    return summarizer

# --- Secret and Model Initialization (Corrected for Hugging Face Spaces) ---

# 1. Read the secret from environment variables provided by Hugging Face Spaces.
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")

# 2. Load the model once using the token.
#    This happens on app startup. The app will stop here if the token is not found.
summarizer = load_summarizer(HUGGINGFACE_TOKEN)


@st.cache_data(ttl=3600)  # Cache Wikipedia API data for 1 hour
def get_recommendations(state: str):
    """Fetches cultural topics from Wikipedia's category system."""
    print(f"Fetching recommendations for {state}...")
    category_title = f"Category:Culture of {state}"
    params = {"action": "query", "format": "json", "list": "categorymembers", "cmtitle": category_title, "cmlimit": 50, "cmtype": "page"}
    try:
        with httpx.Client() as client:
            res = client.get("https://en.wikipedia.org/w/api.php", params=params)
            res.raise_for_status()
            data = res.json()
            members = data.get("query", {}).get("categorymembers", [])
            if not members: return []
            titles = [member["title"] for member in members if "List of" not in member["title"]]
            random.shuffle(titles)
            return titles[:15]
    except Exception as e:
        print(f"Error fetching recommendations: {e}")
        return []

@st.cache_data(ttl=3600)
def get_wiki_summary_and_image(_summarizer, query: str):
    """
    Fetches, summarizes, and extracts an image for a Wikipedia article.
    This function now ACCEPTS the summarizer object instead of loading it.
    """
    print(f"Fetching and processing article for '{query}'...")
    params = {"action": "parse", "page": query, "format": "json", "prop": "text|images", "redirects": True}
    try:
        with httpx.Client() as client:
            res = client.get("https://en.wikipedia.org/w/api.php", params=params)
            res.raise_for_status()
            data = res.json()
            
        if "error" in data:
            print(f"Wikipedia API error for query '{query}': {data['error']}")
            return None

        parse_data = data["parse"]
        title = parse_data["title"]
        html_content = parse_data["text"]["*"]
        soup = BeautifulSoup(html_content, "html.parser")
        
        # Find image more robustly
        img_tag = soup.select_one(".infobox .image img") or soup.find("img")
        image_url = f"https:{img_tag['src']}" if img_tag and 'src' in img_tag.attrs else None
        
        full_text = " ".join([p.get_text() for p in soup.find_all("p") if p.get_text()])
        
        if not full_text.strip():
            print(f"No text content found for '{query}'")
            return None
        
        # The summarizer is now passed in, not loaded here.
        truncated_text = " ".join(full_text.split()[:1024]) # BART's max token limit
        print(f"Summarizing '{query}'...")
        summary_result = _summarizer(truncated_text, max_length=150, min_length=40, do_sample=False)
        print("Summarization complete.")
        
        return {
            "title": title, 
            "summary": summary_result[0]['summary_text'], 
            "image": image_url, 
            "url": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
        }
    except Exception as e:
        print(f"Error in get_wiki_summary_and_image for query '{query}': {e}")
        return None

# --- Streamlit User Interface ---
st.title("๐Ÿ‡ฎ๐Ÿ‡ณ VirasaaT โ€“ AI-Powered Culture Explorer")
st.markdown("Discover the cultural richness of India with dynamic recommendations and AI-generated summaries.")

states_list = ["Select a state", "Andhra Pradesh", "Arunachal Pradesh", "Assam", "Bihar", "Chhattisgarh", "Goa", "Gujarat", "Haryana", "Himachal Pradesh", "Jharkhand", "Karnataka", "Kerala", "Madhya Pradesh", "Maharashtra", "Manipur", "Meghalaya", "Mizoram", "Nagaland", "Odisha", "Punjab", "Rajasthan", "Sikkim", "Tamil Nadu", "Telangana", "Tripura", "Uttar Pradesh", "Uttarakhand", "West Bengal"]

# Initialize session state for the search query
if "query" not in st.session_state:
    st.session_state.query = ""

selected_state = st.selectbox("Step 1: Choose a State to Get Dynamic Recommendations", states_list)

if selected_state != "Select a state":
    recommendations = get_recommendations(selected_state)
    if recommendations:
        display_recommendations = ["Select a topic"] + recommendations
        selected_topic = st.selectbox("Step 2: Select a Recommended Topic", display_recommendations)
        if selected_topic != "Select a topic":
            st.session_state.query = selected_topic
    else:
        st.warning(f"Could not find dynamic recommendations for {selected_state}. Please search manually below.")

manual_query = st.text_input("Or Enter Any Cultural Topic Manually", placeholder="e.g., Diwali, Yoga, Taj Mahal")

if manual_query:
    st.session_state.query = manual_query

if st.button("โœจ Explore Culture", type="primary"):
    query_to_explore = st.session_state.query
    
    if not query_to_explore.strip():
        st.warning("Please select a state and a topic, or enter a topic manually.")
    else:
        with st.spinner(f"AI is exploring '{query_to_explore}'..."):
            # Pass the globally loaded summarizer object into the function.
            article_data = get_wiki_summary_and_image(summarizer, query_to_explore)
            
        if article_data:
            st.subheader(article_data["title"])
            if article_data["image"]:
                st.image(article_data["image"], width=300, caption=article_data["title"])
            st.markdown("### ๐Ÿ“– AI-Generated Summary")
            st.write(article_data["summary"])
            st.markdown(f"**[๐Ÿ”— Read Full Article on Wikipedia]({article_data['url']})**")
        else:
            st.error(f"Could not retrieve or summarize the article for '{query_to_explore}'. Please try another topic.")