Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -60,13 +60,13 @@ def find_exact_matches(df1, df2, column_name):
|
|
| 60 |
return matches
|
| 61 |
|
| 62 |
|
| 63 |
-
def find_similar_texts(df1, df2, column_name, exact_matches, threshold=0.
|
| 64 |
# Find rows with similar texts in the specified column, excluding exact matches
|
| 65 |
similar_texts = []
|
| 66 |
exact_match_indices = set(exact_matches.index.tolist())
|
| 67 |
|
| 68 |
# Concatenate texts from both dataframes
|
| 69 |
-
all_texts = df1[column_name].tolist() + df2[column_name].tolist()
|
| 70 |
|
| 71 |
# Compute TF-IDF vectors
|
| 72 |
vectorizer = TfidfVectorizer()
|
|
|
|
| 60 |
return matches
|
| 61 |
|
| 62 |
|
| 63 |
+
def find_similar_texts(df1, df2, column_name, exact_matches, threshold=0.7):
|
| 64 |
# Find rows with similar texts in the specified column, excluding exact matches
|
| 65 |
similar_texts = []
|
| 66 |
exact_match_indices = set(exact_matches.index.tolist())
|
| 67 |
|
| 68 |
# Concatenate texts from both dataframes
|
| 69 |
+
all_texts = df1[column_name].astype(str).tolist() + df2[column_name].astype(str).tolist()
|
| 70 |
|
| 71 |
# Compute TF-IDF vectors
|
| 72 |
vectorizer = TfidfVectorizer()
|