Song commited on
Commit
4c971f7
·
1 Parent(s): b9a2b78
Files changed (7) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +11 -0
  3. app.py +103 -0
  4. books.db +3 -0
  5. data.json +0 -0
  6. database.py +60 -0
  7. requirements.txt +6 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.db filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY . /app
6
+
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ EXPOSE 7860
10
+
11
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import sqlite3
4
+ import requests
5
+ import numpy as np # 新增:用於處理 NaN
6
+ from io import BytesIO
7
+ from PIL import Image
8
+ from database import recommend_books
9
+ import time # 新增:用於進度條
10
+
11
+ # 標題
12
+ st.title("圖書搜尋")
13
+
14
+ @st.cache_data
15
+ def load_books():
16
+ conn = sqlite3.connect('books.db')
17
+ df = pd.read_sql_query("SELECT * FROM books", conn)
18
+ conn.close()
19
+ # 處理必要欄位型態
20
+ for col in ['discount_price', 'list_price', 'stock']:
21
+ if col in df.columns:
22
+ df[col] = pd.to_numeric(df[col].replace('N/A', np.nan), errors='coerce').fillna(0)
23
+ return df
24
+
25
+ df = load_books()
26
+
27
+ def run_recommendation():
28
+ with st.spinner("正在計算推薦..."):
29
+ time.sleep(0.5)
30
+ st.session_state.recs = recommend_books(st.session_state.search_query)
31
+ st.session_state.selected_display = None
32
+ if st.session_state.recs.empty:
33
+ st.warning("無推薦結果,請調整關鍵字。")
34
+ else:
35
+ st.success(f"找到 {len(st.session_state.recs)} 本推薦書籍!")
36
+
37
+ if "search_query" not in st.session_state:
38
+ st.session_state.search_query = ""
39
+
40
+ search_query = st.text_input(
41
+ "輸入關鍵字(如:靈修、耶穌)",
42
+ value=st.session_state.search_query,
43
+ key="search_query",
44
+ on_change=run_recommendation
45
+ )
46
+ if "recs" not in st.session_state:
47
+ st.session_state.recs = pd.DataFrame()
48
+ if st.session_state.recs is None or st.session_state.recs.empty:
49
+ st.session_state.recs = pd.DataFrame()
50
+ if "selected_display" not in st.session_state:
51
+ st.session_state.selected_display = None
52
+
53
+ if not st.session_state.recs.empty:
54
+ st.subheader("推薦書籍列表(相似度排序)")
55
+ recs_display = st.session_state.recs[['product_id', 'title', 'author', 'similarity']].copy()
56
+ import re
57
+ recs_display['顯示'] = recs_display['title'].apply(lambda x: re.sub(r'<[^>]+>', '', str(x)))
58
+ recs_display['顯示'] = recs_display['顯示'].apply(lambda x: re.sub(r'~', '~', str(x)))
59
+ for idx, row in recs_display.iterrows():
60
+ if st.button(row['顯示'], key=f"book_{row['product_id']}"):
61
+ st.session_state.selected_display = row['顯示']
62
+
63
+ if st.session_state.selected_display:
64
+ selected_row = None
65
+ if not st.session_state.recs.empty:
66
+ recs_display = st.session_state.recs[['product_id', 'title', 'author', 'similarity']].copy()
67
+ import re
68
+ recs_display['顯示'] = recs_display['title'].apply(lambda x: re.sub(r'<[^>]+>', '', str(x)))
69
+ recs_display['顯示'] = recs_display['顯示'].apply(lambda x: re.sub(r'~', '~', str(x)))
70
+ selected_rows = recs_display[recs_display['顯示'] == st.session_state.selected_display]
71
+ if not selected_rows.empty:
72
+ selected_row = selected_rows.iloc[0]
73
+ if selected_row is not None:
74
+ book_data = df[df['product_id'] == selected_row['product_id']].iloc[0]
75
+ def clean_text(val):
76
+ import re
77
+ return re.sub(r'~', '~', re.sub(r'<[^>]+>', '', str(val)))
78
+ col1, col2 = st.columns([1, 2])
79
+ with col1:
80
+ if 'image_url' in book_data and book_data['image_url']:
81
+ try:
82
+ response = requests.get(book_data['image_url'], verify=False, timeout=10)
83
+ response.raise_for_status()
84
+ img = Image.open(BytesIO(response.content))
85
+ img = img.resize((180, 250))
86
+ st.image(img, caption=f"{clean_text(book_data['title'])} - 作者:{clean_text(book_data['author'])}", width=180)
87
+ except Exception as e:
88
+ st.error(f"圖片載入失敗:{e}(已跳過 SSL 驗證)")
89
+ with col2:
90
+ st.header("書籍詳情")
91
+ product_id = book_data.get('product_id', '')
92
+ st.markdown(f"""
93
+ **書名**:{clean_text(book_data.get('title', 'N/A'))}
94
+ **作者**:{clean_text(book_data.get('author', 'N/A'))}
95
+ **出版社**:{clean_text(book_data.get('publisher', 'N/A'))}
96
+ **優惠價**:{book_data.get('discount_price', 'N/A')}
97
+ **定價**:{book_data.get('list_price', 'N/A')}
98
+ **購買連結**:<a href="https://shop.campus.org.tw/ProductDetails.aspx?productID={product_id}" target="_blank" rel="noopener noreferrer">前往購買</a>
99
+ """, unsafe_allow_html=True)
100
+ content_intro = book_data.get('content_intro', 'N/A')
101
+ content_intro = clean_text(content_intro)
102
+ st.markdown("**內容簡介**:")
103
+ st.write(content_intro[:500] + "..." if len(content_intro) > 500 else content_intro)
books.db ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edd29142b8ff8644cd700ce8fa7fbedc8eaa5441e417aa3d7fe282aa49504682
3
+ size 1269760
data.json ADDED
The diff for this file is too large to render. See raw diff
 
database.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import sqlite3
3
+ import pandas as pd
4
+ import re
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+
8
+ # 載入JSON數據
9
+ def load_data(json_file):
10
+ with open(json_file, 'r', encoding='utf-8') as f:
11
+ data = json.load(f)
12
+ return pd.DataFrame(data)
13
+
14
+ # 初始化資料庫
15
+ def init_db(db_name='books.db'):
16
+ conn = sqlite3.connect(db_name)
17
+ df = load_data('data.json') # 假設JSON命名為data.json
18
+ df.to_sql('books', conn, if_exists='replace', index=False)
19
+ conn.close()
20
+ print("資料庫初始化完成!")
21
+
22
+ # 簡單推薦函數(基於內容簡介的TF-IDF,優化中文處理)
23
+ def recommend_books(query, top_n=5):
24
+ conn = sqlite3.connect('books.db')
25
+ df = pd.read_sql_query("SELECT * FROM books", conn)
26
+ conn.close()
27
+
28
+ if df.empty or 'content_intro' not in df.columns:
29
+ return pd.DataFrame()
30
+
31
+ # 預處理:合併 title、author 與 content_intro,提升相關性
32
+ df['text'] = (df['title'].fillna('') + ' ' + df['author'].fillna('') + ' ' + df['content_intro'].fillna('')).astype(str)
33
+ query = re.sub(r'[^\w\s]', ' ', query.lower()) # 移除標點,簡化查詢
34
+
35
+ # TF-IDF向量化(中文優化:使用 ngrams 捕捉詞組,無需分詞)
36
+ vectorizer = TfidfVectorizer(
37
+ max_features=2000,
38
+ ngram_range=(1, 3), # 單詞、雙詞、三詞組合,適合中文
39
+ min_df=1, # 最小文件頻率
40
+ max_df=0.95, # 最大文件頻率(過濾過常見詞)
41
+ lowercase=True
42
+ )
43
+ tfidf_matrix = vectorizer.fit_transform(df['text'].values)
44
+
45
+ query_vec = vectorizer.transform([query])
46
+ similarities = cosine_similarity(query_vec, tfidf_matrix).flatten()
47
+
48
+ df['similarity'] = similarities
49
+
50
+ # 若所有相似度為0,使用關鍵字匹配 fallback
51
+ if similarities.max() == 0:
52
+ df['keyword_score'] = df['text'].str.contains(query, case=False, na=False).astype(int)
53
+ df['similarity'] = df['keyword_score'] # 簡單計數匹配
54
+
55
+ # 回傳推薦結果包含 product_id,並依相似度排序
56
+ recommendations = df.nlargest(top_n, 'similarity')[['product_id', 'title', 'author', 'discount_price', 'similarity']]
57
+ return recommendations
58
+
59
+ if __name__ == "__main__":
60
+ init_db()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ requests
4
+ numpy
5
+ Pillow
6
+ scikit-learn