sathvikk commited on
Commit
b396118
Β·
verified Β·
1 Parent(s): bd27fc9

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +51 -54
src/streamlit_app.py CHANGED
@@ -1,5 +1,5 @@
1
  import os
2
- os.environ["HOME"] = "/tmp" # Fix for Hugging Face Spaces
3
 
4
  import streamlit as st
5
  import requests
@@ -10,7 +10,7 @@ st.set_page_config(page_title="WikiTrail", layout="wide")
10
  st.title("πŸ“š WikiTrail")
11
  st.markdown("Explore Wikipedia topics visually and get a summarized journey.")
12
 
13
- # Language options
14
  languages = {
15
  "English": "en",
16
  "Hindi (ΰ€Ήΰ€Ώΰ€¨ΰ₯ΰ€¦ΰ₯€)": "hi",
@@ -20,37 +20,35 @@ languages = {
20
  lang_name = st.selectbox("🌐 Select Language", list(languages.keys()))
21
  lang_code = languages[lang_name]
22
 
23
- # Input
24
- topic_input = st.text_input("πŸ” Enter a topic (in English)", placeholder="e.g., India, Telangana, Mahatma Gandhi")
25
  topic_input = topic_input.strip()
26
 
27
- # πŸ” Fix: Get best matching title or fallback
28
  def get_translated_title(query, lang):
29
- search_url = f"https://{lang}.wikipedia.org/w/api.php"
30
- params = {
31
- "action": "query",
32
- "list": "search",
33
- "srsearch": query,
34
- "format": "json",
35
- "origin": "*"
36
- }
37
-
38
  try:
 
 
 
 
 
 
 
 
39
  res = requests.get(search_url, params=params)
40
  res.raise_for_status()
41
  data = res.json()
42
- search_results = data.get("query", {}).get("search", [])
43
-
44
- if search_results:
45
- return search_results[0]["title"]
46
- return query # fallback
47
  except:
48
- return query # fallback
49
 
50
- # Summary API
51
  def fetch_summary(title, lang):
52
- safe_title = urllib.parse.quote(title.replace(" ", "_"))
53
- url = f"https://{lang}.wikipedia.org/api/rest_v1/page/summary/{safe_title}"
54
  res = requests.get(url)
55
  if res.status_code == 200:
56
  data = res.json()
@@ -61,58 +59,58 @@ def fetch_summary(title, lang):
61
  }
62
  return None
63
 
64
- # Related Topics
65
  def fetch_related(title, lang):
66
- safe_title = urllib.parse.quote(title.replace(" ", "_"))
67
- url = f"https://{lang}.wikipedia.org/w/api.php?action=query&format=json&origin=*&titles={safe_title}&prop=links&pllimit=5"
68
  res = requests.get(url)
69
  if res.status_code == 200:
70
  data = res.json()
71
- pages = list(data['query']['pages'].values())
72
- if pages and 'links' in pages[0]:
73
- return [link['title'] for link in pages[0]['links']]
74
  return []
75
 
76
- # Simple Summary
77
- def summarize_bullets(texts, limit=3):
78
- full_text = ' '.join(set(texts))
79
- sentences = full_text.replace('ΰ₯€', '.').replace('?', '.').replace('!', '.').split('.')
80
  clean = [s.strip() for s in sentences if s.strip()]
81
  return ["β€’ " + s + "." for s in clean[:limit]] if clean else ["No summary available."]
82
 
83
- # βœ… Main logic
84
  if topic_input:
85
  with st.spinner("πŸ” Searching Wikipedia..."):
 
86
  summaries = []
87
- final_text = ""
88
 
89
- translated_title = get_translated_title(topic_input, lang_code)
90
- st.caption(f"πŸ“„ Fetched title: {translated_title}") # Debug info
91
 
92
- main = fetch_summary(translated_title, lang_code)
93
  if not main:
94
  st.error(f"No matching page found in {lang_name} for '{topic_input}'")
95
  st.stop()
96
 
97
  st.subheader("πŸ”· Main Topic")
98
- summaries.append(main["summary"])
99
- final_text += f"πŸ“š {main['title']} - {lang_name} Wikipedia Summary\n\n"
100
- final_text += main["summary"] + "\n\n"
101
  st.markdown(f"### {main['title']}")
102
  st.write(main["summary"])
103
  st.markdown(f"[Read More β†’]({main['link']})", unsafe_allow_html=True)
104
 
 
 
 
105
  st.subheader("πŸ”— Related Topics")
106
- related_titles = fetch_related(translated_title, lang_code)
107
- if related_titles:
108
- for title in related_titles:
109
- data = fetch_summary(title, lang_code)
110
- if data and data["summary"] not in summaries:
111
- summaries.append(data["summary"])
112
- final_text += f"πŸ”— {data['title']}\n{data['summary']}\n\n"
113
- with st.expander(data["title"]):
114
- st.write(data["summary"])
115
- st.markdown(f"[Read More β†’]({data['link']})", unsafe_allow_html=True)
116
  else:
117
  st.info("No related topics found.")
118
 
@@ -120,10 +118,9 @@ if topic_input:
120
  for bullet in summarize_bullets(summaries):
121
  st.markdown(bullet)
122
 
123
- # πŸ“₯ Download
124
  st.download_button(
125
- label="πŸ“₯ Download Summary as TXT",
126
- data=final_text,
127
  file_name=f"{main['title']}_summary.txt",
128
  mime="text/plain"
129
  )
 
1
  import os
2
+ os.environ["HOME"] = "/tmp" # βœ… Fix streamlit write permission on Hugging Face
3
 
4
  import streamlit as st
5
  import requests
 
10
  st.title("πŸ“š WikiTrail")
11
  st.markdown("Explore Wikipedia topics visually and get a summarized journey.")
12
 
13
+ # 🌐 Language options
14
  languages = {
15
  "English": "en",
16
  "Hindi (ΰ€Ήΰ€Ώΰ€¨ΰ₯ΰ€¦ΰ₯€)": "hi",
 
20
  lang_name = st.selectbox("🌐 Select Language", list(languages.keys()))
21
  lang_code = languages[lang_name]
22
 
23
+ # πŸ” Topic input
24
+ topic_input = st.text_input("πŸ” Enter a topic (in English)", placeholder="e.g., India, Telangana, Gandhi")
25
  topic_input = topic_input.strip()
26
 
27
+ # πŸ”§ Get title (fallbacks if search fails)
28
  def get_translated_title(query, lang):
 
 
 
 
 
 
 
 
 
29
  try:
30
+ search_url = f"https://{lang}.wikipedia.org/w/api.php"
31
+ params = {
32
+ "action": "query",
33
+ "list": "search",
34
+ "srsearch": query,
35
+ "format": "json",
36
+ "origin": "*"
37
+ }
38
  res = requests.get(search_url, params=params)
39
  res.raise_for_status()
40
  data = res.json()
41
+ results = data.get("query", {}).get("search", [])
42
+ if results:
43
+ return results[0]["title"]
44
+ return query
 
45
  except:
46
+ return query
47
 
48
+ # πŸ” Summary fetch
49
  def fetch_summary(title, lang):
50
+ title_encoded = urllib.parse.quote(title.replace(" ", "_"))
51
+ url = f"https://{lang}.wikipedia.org/api/rest_v1/page/summary/{title_encoded}"
52
  res = requests.get(url)
53
  if res.status_code == 200:
54
  data = res.json()
 
59
  }
60
  return None
61
 
62
+ # πŸ”— Related links
63
  def fetch_related(title, lang):
64
+ title_encoded = urllib.parse.quote(title.replace(" ", "_"))
65
+ url = f"https://{lang}.wikipedia.org/w/api.php?action=query&format=json&origin=*&titles={title_encoded}&prop=links&pllimit=5"
66
  res = requests.get(url)
67
  if res.status_code == 200:
68
  data = res.json()
69
+ pages = list(data["query"]["pages"].values())
70
+ if pages and "links" in pages[0]:
71
+ return [link["title"] for link in pages[0]["links"]]
72
  return []
73
 
74
+ # 🧠 Bullet summary
75
+ def summarize_bullets(summaries, limit=3):
76
+ full = ' '.join(set(summaries))
77
+ sentences = full.replace("ΰ₯€", ".").replace("!", ".").replace("?", ".").split(".")
78
  clean = [s.strip() for s in sentences if s.strip()]
79
  return ["β€’ " + s + "." for s in clean[:limit]] if clean else ["No summary available."]
80
 
81
+ # πŸ” Main logic
82
  if topic_input:
83
  with st.spinner("πŸ” Searching Wikipedia..."):
84
+ all_text = ""
85
  summaries = []
 
86
 
87
+ title = get_translated_title(topic_input, lang_code)
88
+ st.caption(f"πŸ“„ Fetched title: {title}")
89
 
90
+ main = fetch_summary(title, lang_code)
91
  if not main:
92
  st.error(f"No matching page found in {lang_name} for '{topic_input}'")
93
  st.stop()
94
 
95
  st.subheader("πŸ”· Main Topic")
 
 
 
96
  st.markdown(f"### {main['title']}")
97
  st.write(main["summary"])
98
  st.markdown(f"[Read More β†’]({main['link']})", unsafe_allow_html=True)
99
 
100
+ summaries.append(main["summary"])
101
+ all_text += f"{main['title']} ({lang_name})\n\n{main['summary']}\n\n"
102
+
103
  st.subheader("πŸ”— Related Topics")
104
+ related = fetch_related(title, lang_code)
105
+ if related:
106
+ for r in related:
107
+ sub = fetch_summary(r, lang_code)
108
+ if sub and sub["summary"] not in summaries:
109
+ summaries.append(sub["summary"])
110
+ all_text += f"{sub['title']}\n{sub['summary']}\n\n"
111
+ with st.expander(sub["title"]):
112
+ st.write(sub["summary"])
113
+ st.markdown(f"[Read More β†’]({sub['link']})", unsafe_allow_html=True)
114
  else:
115
  st.info("No related topics found.")
116
 
 
118
  for bullet in summarize_bullets(summaries):
119
  st.markdown(bullet)
120
 
 
121
  st.download_button(
122
+ label="πŸ“₯ Download Summary",
123
+ data=all_text,
124
  file_name=f"{main['title']}_summary.txt",
125
  mime="text/plain"
126
  )