Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -61,7 +61,7 @@ def find_exact_match(df1, df2, column_name):
|
|
| 61 |
matches = pd.merge(df1, df2, on=column_name, how='inner')
|
| 62 |
return matches
|
| 63 |
|
| 64 |
-
def find_similar_texts(df1, df2, column_name, threshold=0.
|
| 65 |
# Find rows with similar texts in the specified column, including exact matches
|
| 66 |
similar_texts = []
|
| 67 |
exact_matches = []
|
|
@@ -92,7 +92,7 @@ def find_similar_texts(df1, df2, column_name, threshold=0.3):
|
|
| 92 |
if similarity_score >= threshold:
|
| 93 |
if similarity == 1: # Exact match
|
| 94 |
exact_matches.append((i, j, row1[column_name], row2[column_name]))
|
| 95 |
-
if similarity >= threshold and similarity < 1:
|
| 96 |
similar_texts.append((i, j, row1[column_name], row2[column_name]))
|
| 97 |
|
| 98 |
return similar_texts, exact_matches
|
|
|
|
| 61 |
matches = pd.merge(df1, df2, on=column_name, how='inner')
|
| 62 |
return matches
|
| 63 |
|
| 64 |
+
def find_similar_texts(df1, df2, column_name, threshold=0.4):
|
| 65 |
# Find rows with similar texts in the specified column, including exact matches
|
| 66 |
similar_texts = []
|
| 67 |
exact_matches = []
|
|
|
|
| 92 |
if similarity_score >= threshold:
|
| 93 |
if similarity == 1: # Exact match
|
| 94 |
exact_matches.append((i, j, row1[column_name], row2[column_name]))
|
| 95 |
+
if similarity >= threshold and similarity < 1: #remove same
|
| 96 |
similar_texts.append((i, j, row1[column_name], row2[column_name]))
|
| 97 |
|
| 98 |
return similar_texts, exact_matches
|