Spaces:

dryade36513
/

MooMooChecker

Sleeping

App Files Files Community

dryade36513 commited on Jan 10, 2025

Commit

cff7733

verified ·

1 Parent(s): bd8c43b

Upload 2 files

Browse files

Files changed (2) hide show

app.py +151 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# app.py
+import streamlit as st
+import jieba
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import difflib
+import numpy as np
+import time
+# 設置網頁標題等信息
+st.set_page_config(
+    page_title="哞哞文章相似度檢測",
+    page_icon="🐮",
+    layout="wide",
+    initial_sidebar_state="collapsed"
+)
+# 自定義CSS樣式
+st.markdown("""
+<style>
+    .stTextArea textarea {
+        font-size: 16px !important;
+    }
+    .big-font {
+        font-size: 24px !important;
+        font-weight: bold !important;
+        color: #FF4B4B !important;
+    }
+    .result-font {
+        font-size: 20px !important;
+        color: #1E88E5 !important;
+    }
+</style>
+""", unsafe_allow_html=True)
+# 顯示標題
+st.markdown("<h1 style='text-align: center; color: #FF4B4B;'>🐮 哞哞文章相似度檢測</h1>", unsafe_allow_html=True)
+# 創建兩列佈局
+col1, col2 = st.columns(2)
+with col1:
+    st.markdown("### 📝 文章1")
+    text1 = st.text_area("", height=300, placeholder="請在這裡輸入第一篇文章...", key="text1")
+with col2:
+    st.markdown("### 📝 文章2")
+    text2 = st.text_area("", height=300, placeholder="請在這裡輸入第二篇文章...", key="text2")
+# 創建按鈕列
+col_btn1, col_btn2, col_btn3 = st.columns([1,1,1])
+with col_btn2:
+    start_btn = st.button("🚀 開始計算相似度", type="primary", use_container_width=True)
+def calculate_similarity(text1, text2):
+    """計算文本相似度"""
+    if not text1.strip() or not text2.strip():
+        return None, None
+    # 1. 計算字詞重合度
+    words1 = list(jieba.cut(text1))
+    words2 = list(jieba.cut(text2))
+    word_set1 = set(words1)
+    word_set2 = set(words2)
+    word_similarity = len(word_set1.intersection(word_set2)) / len(word_set1.union(word_set2))
+    # 2. 計算句子相似度
+    sentences1 = text1.split("。")
+    sentences2 = text2.split("。")
+    sentence_matcher = difflib.SequenceMatcher(None, sentences1, sentences2)
+    sentence_similarity = sentence_matcher.ratio()
+    # 3. 計算TF-IDF相似度
+    vectorizer = TfidfVectorizer()
+    try:
+        tfidf_matrix = vectorizer.fit_transform([text1, text2])
+        cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
+    except:
+        cosine_sim = 0
+    # 計算總相似度
+    weights = [0.4, 0.3, 0.3]
+    total_similarity = (word_similarity * weights[0] +
+                       sentence_similarity * weights[1] +
+                       cosine_sim * weights[2]) * 100
+    similarity_score = round(total_similarity, 2)
+    # 判定結果
+    if similarity_score <= 30:
+        result = "兩篇文章沒有關係"
+    elif similarity_score <= 60:
+        result = "兩篇文章似乎有那麼一點關係"
+    elif similarity_score <= 80:
+        result = "兩篇文章很類似"
+    else:
+        result = "兩篇文章有抄襲犯罪的味道"
+    return similarity_score, result
+if start_btn and text1 and text2:
+    with st.spinner('🔍 分析中，請稍等...'):
+        # 顯示進度條
+        progress_text = "計算中..."
+        my_bar = st.progress(0, text=progress_text)
+        for percent_complete in range(100):
+            time.sleep(0.01)
+            my_bar.progress(percent_complete + 1, text=progress_text)
+        # 計算相似度
+        similarity_score, result = calculate_similarity(text1, text2)
+        if similarity_score is not None:
+            # 清除進度條
+            my_bar.empty()
+            # 顯示結果
+            st.markdown("---")
+            st.markdown("<h3 style='text-align: center;'>✨ 分析結果</h3>", unsafe_allow_html=True)
+            result_text = f"""
+            <div style='text-align: center;'>
+                <p class='big-font'>相似度：{similarity_score}%</p>
+                <p class='result-font'>分析結果：{result}</p>
+            </div>
+            """
+            st.markdown(result_text, unsafe_allow_html=True)
+            # 顯示可愛的表情符號
+            if similarity_score <= 30:
+                st.markdown("<h1 style='text-align: center;'>😌</h1>", unsafe_allow_html=True)
+            elif similarity_score <= 60:
+                st.markdown("<h1 style='text-align: center;'>🤔</h1>", unsafe_allow_html=True)
+            elif similarity_score <= 80:
+                st.markdown("<h1 style='text-align: center;'>😮</h1>", unsafe_allow_html=True)
+            else:
+                st.markdown("<h1 style='text-align: center;'>😱</h1>", unsafe_allow_html=True)
+else:
+    st.info('👆 請在上方輸入兩篇要比較的文章，然後點擊"開始計算相似度"按鈕')
+# 在底部添加說明
+st.markdown("---")
+st.markdown("""
+<div style='text-align: center;'>
+    <p style='color: gray; font-size: 14px;'>
+        💡 判定標準：<br>
+        0-30%：文章沒有關係 | 31-60%：稍有關係 | 61-80%：很類似 | 81-100%：疑似抄襲
+    </p>
+</div>
+""", unsafe_allow_html=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+jieba
+scikit-learn
+numpy