|
|
import os |
|
|
os.environ["HOME"] = "/tmp" |
|
|
|
|
|
import streamlit as st |
|
|
import requests |
|
|
import urllib.parse |
|
|
|
|
|
st.set_page_config(page_title="WikiTrail", layout="wide") |
|
|
|
|
|
st.title("📚 WikiTrail") |
|
|
st.markdown("Explore Wikipedia topics visually and get a summarized journey.") |
|
|
|
|
|
|
|
|
languages = { |
|
|
"English": "en", |
|
|
"Hindi (हिन्दी)": "hi", |
|
|
"Telugu (తెలుగు)": "te", |
|
|
"Tamil (தமிழ்)": "ta" |
|
|
} |
|
|
lang_name = st.selectbox("🌐 Select Language", list(languages.keys())) |
|
|
lang_code = languages[lang_name] |
|
|
|
|
|
|
|
|
topic_input = st.text_input("🔍 Enter a topic (in English)", placeholder="e.g., India, Telangana, Gandhi") |
|
|
topic_input = topic_input.strip() |
|
|
|
|
|
|
|
|
def get_translated_title(query, lang): |
|
|
try: |
|
|
search_url = f"https://{lang}.wikipedia.org/w/api.php" |
|
|
params = { |
|
|
"action": "query", |
|
|
"list": "search", |
|
|
"srsearch": query, |
|
|
"format": "json", |
|
|
"origin": "*" |
|
|
} |
|
|
res = requests.get(search_url, params=params) |
|
|
res.raise_for_status() |
|
|
data = res.json() |
|
|
results = data.get("query", {}).get("search", []) |
|
|
if results: |
|
|
return results[0]["title"] |
|
|
return query |
|
|
except: |
|
|
return query |
|
|
|
|
|
|
|
|
def fetch_summary(title, lang): |
|
|
title_encoded = urllib.parse.quote(title.replace(" ", "_")) |
|
|
url = f"https://{lang}.wikipedia.org/api/rest_v1/page/summary/{title_encoded}" |
|
|
res = requests.get(url) |
|
|
if res.status_code == 200: |
|
|
data = res.json() |
|
|
return { |
|
|
"title": data.get("title", ""), |
|
|
"summary": data.get("extract", ""), |
|
|
"link": data.get("content_urls", {}).get("desktop", {}).get("page") |
|
|
} |
|
|
return None |
|
|
|
|
|
|
|
|
def fetch_related(title, lang): |
|
|
title_encoded = urllib.parse.quote(title.replace(" ", "_")) |
|
|
url = f"https://{lang}.wikipedia.org/w/api.php?action=query&format=json&origin=*&titles={title_encoded}&prop=links&pllimit=5" |
|
|
res = requests.get(url) |
|
|
if res.status_code == 200: |
|
|
data = res.json() |
|
|
pages = list(data["query"]["pages"].values()) |
|
|
if pages and "links" in pages[0]: |
|
|
return [link["title"] for link in pages[0]["links"]] |
|
|
return [] |
|
|
|
|
|
|
|
|
def summarize_bullets(summaries, limit=3): |
|
|
full = ' '.join(set(summaries)) |
|
|
sentences = full.replace("।", ".").replace("!", ".").replace("?", ".").split(".") |
|
|
clean = [s.strip() for s in sentences if s.strip()] |
|
|
return ["• " + s + "." for s in clean[:limit]] if clean else ["No summary available."] |
|
|
|
|
|
|
|
|
if topic_input: |
|
|
with st.spinner("🔍 Searching Wikipedia..."): |
|
|
all_text = "" |
|
|
summaries = [] |
|
|
|
|
|
title = get_translated_title(topic_input, lang_code) |
|
|
st.caption(f"📄 Fetched title: {title}") |
|
|
|
|
|
main = fetch_summary(title, lang_code) |
|
|
if not main: |
|
|
st.error(f"No matching page found in {lang_name} for '{topic_input}'") |
|
|
st.stop() |
|
|
|
|
|
st.subheader("🔷 Main Topic") |
|
|
st.markdown(f"### {main['title']}") |
|
|
st.write(main["summary"]) |
|
|
st.markdown(f"[Read More →]({main['link']})", unsafe_allow_html=True) |
|
|
|
|
|
summaries.append(main["summary"]) |
|
|
all_text += f"{main['title']} ({lang_name})\n\n{main['summary']}\n\n" |
|
|
|
|
|
st.subheader("🔗 Related Topics") |
|
|
related = fetch_related(title, lang_code) |
|
|
if related: |
|
|
for r in related: |
|
|
sub = fetch_summary(r, lang_code) |
|
|
if sub and sub["summary"] not in summaries: |
|
|
summaries.append(sub["summary"]) |
|
|
all_text += f"{sub['title']}\n{sub['summary']}\n\n" |
|
|
with st.expander(sub["title"]): |
|
|
st.write(sub["summary"]) |
|
|
st.markdown(f"[Read More →]({sub['link']})", unsafe_allow_html=True) |
|
|
else: |
|
|
st.info("No related topics found.") |
|
|
|
|
|
st.subheader("🧠 Combined Summary") |
|
|
for bullet in summarize_bullets(summaries): |
|
|
st.markdown(bullet) |
|
|
|
|
|
st.download_button( |
|
|
label="📥 Download Summary", |
|
|
data=all_text, |
|
|
file_name=f"{main['title']}_summary.txt", |
|
|
mime="text/plain" |
|
|
) |
|
|
|