File size: 834 Bytes
b4151cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from model import semantic_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


def lexical_similarity(text1, text2):
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([text1, text2])
    score = cosine_similarity(vectors[0:1], vectors[1:2])[0][0]
    return round(score, 4)


def compute_scores(text1, text2):
    sem_score = semantic_similarity(text1, text2)
    lex_score = lexical_similarity(text1, text2)

    final_score = (0.75 * sem_score) + (0.25 * lex_score)

    return {
        "semantic_score": round(sem_score, 4),
        "lexical_score": round(lex_score, 4),
        "final_score": round(final_score, 4)
    }


def classify(score, threshold=0.75):
    return "Plagiarized" if score >= threshold else "Not Plagiarized"