Spaces:
Running
Running
| from model import semantic_similarity | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| def lexical_similarity(text1, text2): | |
| vectorizer = TfidfVectorizer() | |
| vectors = vectorizer.fit_transform([text1, text2]) | |
| score = cosine_similarity(vectors[0:1], vectors[1:2])[0][0] | |
| return round(score, 4) | |
| def compute_scores(text1, text2): | |
| sem_score = semantic_similarity(text1, text2) | |
| lex_score = lexical_similarity(text1, text2) | |
| final_score = (0.75 * sem_score) + (0.25 * lex_score) | |
| return { | |
| "semantic_score": round(sem_score, 4), | |
| "lexical_score": round(lex_score, 4), | |
| "final_score": round(final_score, 4) | |
| } | |
| def classify(score, threshold=0.75): | |
| return "Plagiarized" if score >= threshold else "Not Plagiarized" |