wikitrial / src /streamlit_app.py
sathvikk's picture
Update src/streamlit_app.py
b396118 verified
import os
os.environ["HOME"] = "/tmp" # ✅ Fix streamlit write permission on Hugging Face
import streamlit as st
import requests
import urllib.parse
st.set_page_config(page_title="WikiTrail", layout="wide")
st.title("📚 WikiTrail")
st.markdown("Explore Wikipedia topics visually and get a summarized journey.")
# 🌐 Language options
languages = {
"English": "en",
"Hindi (हिन्दी)": "hi",
"Telugu (తెలుగు)": "te",
"Tamil (தமிழ்)": "ta"
}
lang_name = st.selectbox("🌐 Select Language", list(languages.keys()))
lang_code = languages[lang_name]
# 🔍 Topic input
topic_input = st.text_input("🔍 Enter a topic (in English)", placeholder="e.g., India, Telangana, Gandhi")
topic_input = topic_input.strip()
# 🔧 Get title (fallbacks if search fails)
def get_translated_title(query, lang):
try:
search_url = f"https://{lang}.wikipedia.org/w/api.php"
params = {
"action": "query",
"list": "search",
"srsearch": query,
"format": "json",
"origin": "*"
}
res = requests.get(search_url, params=params)
res.raise_for_status()
data = res.json()
results = data.get("query", {}).get("search", [])
if results:
return results[0]["title"]
return query
except:
return query
# 🔍 Summary fetch
def fetch_summary(title, lang):
title_encoded = urllib.parse.quote(title.replace(" ", "_"))
url = f"https://{lang}.wikipedia.org/api/rest_v1/page/summary/{title_encoded}"
res = requests.get(url)
if res.status_code == 200:
data = res.json()
return {
"title": data.get("title", ""),
"summary": data.get("extract", ""),
"link": data.get("content_urls", {}).get("desktop", {}).get("page")
}
return None
# 🔗 Related links
def fetch_related(title, lang):
title_encoded = urllib.parse.quote(title.replace(" ", "_"))
url = f"https://{lang}.wikipedia.org/w/api.php?action=query&format=json&origin=*&titles={title_encoded}&prop=links&pllimit=5"
res = requests.get(url)
if res.status_code == 200:
data = res.json()
pages = list(data["query"]["pages"].values())
if pages and "links" in pages[0]:
return [link["title"] for link in pages[0]["links"]]
return []
# 🧠 Bullet summary
def summarize_bullets(summaries, limit=3):
full = ' '.join(set(summaries))
sentences = full.replace("।", ".").replace("!", ".").replace("?", ".").split(".")
clean = [s.strip() for s in sentences if s.strip()]
return ["• " + s + "." for s in clean[:limit]] if clean else ["No summary available."]
# 🔍 Main logic
if topic_input:
with st.spinner("🔍 Searching Wikipedia..."):
all_text = ""
summaries = []
title = get_translated_title(topic_input, lang_code)
st.caption(f"📄 Fetched title: {title}")
main = fetch_summary(title, lang_code)
if not main:
st.error(f"No matching page found in {lang_name} for '{topic_input}'")
st.stop()
st.subheader("🔷 Main Topic")
st.markdown(f"### {main['title']}")
st.write(main["summary"])
st.markdown(f"[Read More →]({main['link']})", unsafe_allow_html=True)
summaries.append(main["summary"])
all_text += f"{main['title']} ({lang_name})\n\n{main['summary']}\n\n"
st.subheader("🔗 Related Topics")
related = fetch_related(title, lang_code)
if related:
for r in related:
sub = fetch_summary(r, lang_code)
if sub and sub["summary"] not in summaries:
summaries.append(sub["summary"])
all_text += f"{sub['title']}\n{sub['summary']}\n\n"
with st.expander(sub["title"]):
st.write(sub["summary"])
st.markdown(f"[Read More →]({sub['link']})", unsafe_allow_html=True)
else:
st.info("No related topics found.")
st.subheader("🧠 Combined Summary")
for bullet in summarize_bullets(summaries):
st.markdown(bullet)
st.download_button(
label="📥 Download Summary",
data=all_text,
file_name=f"{main['title']}_summary.txt",
mime="text/plain"
)