Spaces:

sathvikk
/

wikitrial

Sleeping

App Files Files Community

wikitrial / src /streamlit_app.py

sathvikk

Update src/streamlit_app.py

b396118 verified 7 months ago

raw

history blame contribute delete

4.43 kB

	import os
	os.environ["HOME"] = "/tmp" # ✅ Fix streamlit write permission on Hugging Face

	import streamlit as st
	import requests
	import urllib.parse

	st.set_page_config(page_title="WikiTrail", layout="wide")

	st.title("📚 WikiTrail")
	st.markdown("Explore Wikipedia topics visually and get a summarized journey.")

	# 🌐 Language options
	languages = {
	"English": "en",
	"Hindi (हिन्दी)": "hi",
	"Telugu (తెలుగు)": "te",
	"Tamil (தமிழ்)": "ta"
	}
	lang_name = st.selectbox("🌐 Select Language", list(languages.keys()))
	lang_code = languages[lang_name]

	# 🔍 Topic input
	topic_input = st.text_input("🔍 Enter a topic (in English)", placeholder="e.g., India, Telangana, Gandhi")
	topic_input = topic_input.strip()

	# 🔧 Get title (fallbacks if search fails)
	def get_translated_title(query, lang):
	try:
	search_url = f"https://{lang}.wikipedia.org/w/api.php"
	params = {
	"action": "query",
	"list": "search",
	"srsearch": query,
	"format": "json",
	"origin": "*"
	}
	res = requests.get(search_url, params=params)
	res.raise_for_status()
	data = res.json()
	results = data.get("query", {}).get("search", [])
	if results:
	return results[0]["title"]
	return query
	except:
	return query

	# 🔍 Summary fetch
	def fetch_summary(title, lang):
	title_encoded = urllib.parse.quote(title.replace(" ", "_"))
	url = f"https://{lang}.wikipedia.org/api/rest_v1/page/summary/{title_encoded}"
	res = requests.get(url)
	if res.status_code == 200:
	data = res.json()
	return {
	"title": data.get("title", ""),
	"summary": data.get("extract", ""),
	"link": data.get("content_urls", {}).get("desktop", {}).get("page")
	}
	return None

	# 🔗 Related links
	def fetch_related(title, lang):
	title_encoded = urllib.parse.quote(title.replace(" ", "_"))
	url = f"https://{lang}.wikipedia.org/w/api.php?action=query&format=json&origin=*&titles={title_encoded}&prop=links&pllimit=5"
	res = requests.get(url)
	if res.status_code == 200:
	data = res.json()
	pages = list(data["query"]["pages"].values())
	if pages and "links" in pages[0]:
	return [link["title"] for link in pages[0]["links"]]
	return []

	# 🧠 Bullet summary
	def summarize_bullets(summaries, limit=3):
	full = ' '.join(set(summaries))
	sentences = full.replace("।", ".").replace("!", ".").replace("?", ".").split(".")
	clean = [s.strip() for s in sentences if s.strip()]
	return ["• " + s + "." for s in clean[:limit]] if clean else ["No summary available."]

	# 🔍 Main logic
	if topic_input:
	with st.spinner("🔍 Searching Wikipedia..."):
	all_text = ""
	summaries = []

	title = get_translated_title(topic_input, lang_code)
	st.caption(f"📄 Fetched title: {title}")

	main = fetch_summary(title, lang_code)
	if not main:
	st.error(f"No matching page found in {lang_name} for '{topic_input}'")
	st.stop()

	st.subheader("🔷 Main Topic")
	st.markdown(f"### {main['title']}")
	st.write(main["summary"])
	st.markdown(f"[Read More →]({main['link']})", unsafe_allow_html=True)

	summaries.append(main["summary"])
	all_text += f"{main['title']} ({lang_name})\n\n{main['summary']}\n\n"

	st.subheader("🔗 Related Topics")
	related = fetch_related(title, lang_code)
	if related:
	for r in related:
	sub = fetch_summary(r, lang_code)
	if sub and sub["summary"] not in summaries:
	summaries.append(sub["summary"])
	all_text += f"{sub['title']}\n{sub['summary']}\n\n"
	with st.expander(sub["title"]):
	st.write(sub["summary"])
	st.markdown(f"[Read More →]({sub['link']})", unsafe_allow_html=True)
	else:
	st.info("No related topics found.")

	st.subheader("🧠 Combined Summary")
	for bullet in summarize_bullets(summaries):
	st.markdown(bullet)

	st.download_button(
	label="📥 Download Summary",
	data=all_text,
	file_name=f"{main['title']}_summary.txt",
	mime="text/plain"
	)