stardust-coder commited on
Commit
135b830
·
1 Parent(s): 41414a0
Files changed (1) hide show
  1. src/streamlit_app.py +40 -81
src/streamlit_app.py CHANGED
@@ -1,119 +1,78 @@
1
  import json
2
- import time
3
-
4
  import requests
5
  import streamlit as st
6
- from bs4 import BeautifulSoup
7
-
8
-
9
- # Google Scholar から論文リスト要素を取得
10
- def scrape_listings(soup):
11
- return soup.select("div.gs_r.gs_or.gs_scl")
12
-
13
-
14
- # タイトル取得
15
- def scrape_scholar_title(listing):
16
- title_element = listing.select_one("h3.gs_rt > a")
17
- if title_element:
18
- return title_element.text.strip()
19
- return "タイトルなし"
20
-
21
-
22
- # publication info 取得
23
- def scrape_scholar_publication_info(listing):
24
- publication_info_element = listing.select_one("div.gs_a")
25
- if publication_info_element:
26
- return publication_info_element.text.strip()
27
- return "出版情報なし"
28
 
29
 
30
- # スニペット取得
31
- def scrape_scholar_snippet(listing):
32
- snippet_element = listing.select_one("div.gs_rs, div.gs_snippet")
33
- if snippet_element:
34
- return snippet_element.text.strip()
35
- return "スニペットなし"
36
 
37
 
38
- # Google Scholar からデータ取得
39
- def fetch_google_scholar_data(query):
40
- url = "https://scholar.google.com/scholar"
41
  params = {
42
- "hl": "en",
43
- "q": query,
 
44
  }
45
 
46
- headers = {
47
- "User-Agent": (
48
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
49
- "AppleWebKit/537.36 (KHTML, like Gecko) "
50
- "Chrome/122.0.0.0 Safari/537.36"
51
- )
52
- }
53
-
54
- response = requests.get(url, params=params, headers=headers, timeout=15, verify=False)
55
  response.raise_for_status()
56
 
57
- time.sleep(2)
58
- soup = BeautifulSoup(response.text, "html.parser")
59
-
60
- listings = scrape_listings(soup)
61
 
62
- scholar_data = []
63
- for listing in listings:
64
- title = scrape_scholar_title(listing)
65
- publication_info = scrape_scholar_publication_info(listing)
66
- snippet = scrape_scholar_snippet(listing)
67
-
68
- scholar_info = {
69
- "title": title,
70
- "publication_info": publication_info,
71
- "snippet": snippet,
72
  }
73
- scholar_data.append(scholar_info)
74
 
75
- return scholar_data
76
 
77
 
78
  def main():
79
- st.set_page_config(page_title="Google Scholar Scraper", layout="wide")
80
- st.title("Google Scholar Scraper")
81
- st.write("Google Scholar 検索結果を取得て表示します。")
82
 
83
- query = st.text_input("検索キーワード", value="biology")
 
84
 
85
  if st.button("検索"):
86
- with st.spinner("Google Scholar からデータ取得中..."):
87
  try:
88
- scholar_data = fetch_google_scholar_data(query)
89
 
90
- if not scholar_data:
91
- st.warning("検索結果が取得できませんでした。")
92
  return
93
 
94
- st.success(f"{len(scholar_data)} 件の結果を取得しました。")
95
-
96
- for i, item in enumerate(scholar_data, start=1):
97
- with st.container():
98
- st.subheader(f"{i}. {item['title']}")
99
- st.write(f"**Publication Info:** {item['publication_info']}")
100
- st.write(f"**Snippet:** {item['snippet']}")
101
- st.divider()
102
 
103
- json_data = json.dumps(scholar_data, indent=4, ensure_ascii=False)
 
 
 
 
 
 
104
 
 
105
  st.download_button(
106
  label="JSONをダウンロード",
107
  data=json_data,
108
- file_name="google_scholar_data.json",
109
- mime="application/json",
110
  )
111
 
112
- st.json(scholar_data)
113
 
114
  except requests.exceptions.RequestException as e:
115
- st.error(f"リクエスト中にエラーが発生しました: {e}")
116
  except Exception as e:
117
  st.error(f"予期しないエラーが発生しました: {e}")
118
 
 
119
  main()
 
1
  import json
 
 
2
  import requests
3
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
+ API_URL = "https://api.semanticscholar.org/graph/v1/paper/search"
 
 
 
 
 
7
 
8
 
9
+ def search_papers(query, limit=10):
 
 
10
  params = {
11
+ "query": query,
12
+ "limit": limit,
13
+ "fields": "title,abstract,authors,year,venue,url"
14
  }
15
 
16
+ response = requests.get(API_URL, params=params, timeout=20)
 
 
 
 
 
 
 
 
17
  response.raise_for_status()
18
 
19
+ data = response.json()
20
+ papers = []
 
 
21
 
22
+ for item in data.get("data", []):
23
+ authors = ", ".join([a.get("name", "") for a in item.get("authors", [])])
24
+ paper = {
25
+ "title": item.get("title", "タイトルなし"),
26
+ "publication_info": f"{authors} / {item.get('venue', 'Unknown Venue')} / {item.get('year', 'Unknown Year')}",
27
+ "snippet": item.get("abstract", "概要なし"),
28
+ "url": item.get("url", "")
 
 
 
29
  }
30
+ papers.append(paper)
31
 
32
+ return papers
33
 
34
 
35
  def main():
36
+ st.set_page_config(page_title="Paper Search App", layout="wide")
37
+ st.title("論文検索アプリ")
38
+ st.write("Semantic Scholar API を使って論文を検索します。")
39
 
40
+ query = st.text_input("検索キーワード", value="neuro")
41
+ limit = st.slider("取得件数", min_value=1, max_value=20, value=10)
42
 
43
  if st.button("検索"):
44
+ with st.spinner("検索中..."):
45
  try:
46
+ papers = search_papers(query, limit)
47
 
48
+ if not papers:
49
+ st.warning("結果が見つかりませんでした。")
50
  return
51
 
52
+ st.success(f"{len(papers)} 件取得しました。")
 
 
 
 
 
 
 
53
 
54
+ for i, paper in enumerate(papers, start=1):
55
+ st.subheader(f"{i}. {paper['title']}")
56
+ st.write(f"**Publication Info:** {paper['publication_info']}")
57
+ st.write(f"**Snippet:** {paper['snippet']}")
58
+ if paper["url"]:
59
+ st.markdown(f"[論文ページを開く]({paper['url']})")
60
+ st.divider()
61
 
62
+ json_data = json.dumps(papers, indent=2, ensure_ascii=False)
63
  st.download_button(
64
  label="JSONをダウンロード",
65
  data=json_data,
66
+ file_name="papers.json",
67
+ mime="application/json"
68
  )
69
 
70
+ st.json(papers)
71
 
72
  except requests.exceptions.RequestException as e:
73
+ st.error(f"APIリクエスト中にエラーが発生しました: {e}")
74
  except Exception as e:
75
  st.error(f"予期しないエラーが発生しました: {e}")
76
 
77
+
78
  main()