Spaces:

Mattral
/

Excel-Match-Analysis

Sleeping

App Files Files Community

Mattral commited on May 2, 2024

Commit

db70d75

verified ·

1 Parent(s): 11e747b

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -1

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from Levenshtein import distance as levenshtein_distance
 import matplotlib.pyplot as plt
 import seaborn as sns
 ms = st.session_state
 if "themes" not in ms:
   ms.themes = {"current_theme": "light",
@@ -56,13 +57,20 @@ def read_csv_or_excel(file):
         return pd.read_excel(file)
     else:
         raise ValueError("Unsupported file format. Only CSV and Excel files are supported.")
 def find_exact_match(df1, df2, column_name):
     # Find rows with exact matches in the specified column
     matches = pd.merge(df1, df2, on=column_name, how='inner')
     return matches
 def find_similar_texts(df1, df2, column_name, threshold=0.3):
     # Find rows with similar texts in the specified column, excluding exact matches
     similar_texts = []
@@ -108,6 +116,7 @@ def plot_correlation(df, column):
     return plt.gcf()  # Return the matplotlib figure
 st.set_option('deprecation.showPyplotGlobalUse', False)
 def plot_correlation_matrix(df):
     # Filter for numeric columns, if the DataFrame has non-numeric columns
     numeric_df = df.select_dtypes(include=['number'])
@@ -160,6 +169,8 @@ def main():
            # Display exact matches
             st.header("Exact Matches Compare")
             for match in exact_matches:
                 st.write(f"Row {match[0]} in warehouse item stocks is exactly the same as Row {match[1]} in industry item stocks:")
                 st.write(f"Warehouse: {match[2]}")
                 st.write(f"Industry: {match[3]}")
@@ -169,6 +180,9 @@ def main():
             # Display similar texts
             st.header("Similar (but Not Same) Texts")
             for text_pair in similar_texts:
                 st.write(f"Row {text_pair[0]} in warehouse item stocks is similar to Row {text_pair[1]} in industry item stocks:")
                 st.write(f"Warehouse: {text_pair[2]}")
                 st.write(f"Industry: {text_pair[3]}")
@@ -202,4 +216,4 @@ def main():
             plot_correlation_matrix(industry_df)
 if __name__ == "__main__":
-    main()

 import matplotlib.pyplot as plt
 import seaborn as sns
 ms = st.session_state
 if "themes" not in ms:
   ms.themes = {"current_theme": "light",
         return pd.read_excel(file)
     else:
         raise ValueError("Unsupported file format. Only CSV and Excel files are supported.")
 def find_exact_match(df1, df2, column_name):
+    # Ensure the column for merging has the same data type
+    df1[column_name] = df1[column_name].astype(str).str.strip()
+    df2[column_name] = df2[column_name].astype(str).str.strip()
     # Find rows with exact matches in the specified column
     matches = pd.merge(df1, df2, on=column_name, how='inner')
     return matches
 def find_similar_texts(df1, df2, column_name, threshold=0.3):
     # Find rows with similar texts in the specified column, excluding exact matches
     similar_texts = []
     return plt.gcf()  # Return the matplotlib figure
 st.set_option('deprecation.showPyplotGlobalUse', False)
 def plot_correlation_matrix(df):
     # Filter for numeric columns, if the DataFrame has non-numeric columns
     numeric_df = df.select_dtypes(include=['number'])
            # Display exact matches
             st.header("Exact Matches Compare")
             for match in exact_matches:
+                warehouse_index = text_pair[0] + 2
+                industry_index = text_pair[1] + 2
                 st.write(f"Row {match[0]} in warehouse item stocks is exactly the same as Row {match[1]} in industry item stocks:")
                 st.write(f"Warehouse: {match[2]}")
                 st.write(f"Industry: {match[3]}")
             # Display similar texts
             st.header("Similar (but Not Same) Texts")
             for text_pair in similar_texts:
+                warehouse_index = text_pair[0] + 2
+                industry_index = text_pair[1] + 2
                 st.write(f"Row {text_pair[0]} in warehouse item stocks is similar to Row {text_pair[1]} in industry item stocks:")
                 st.write(f"Warehouse: {text_pair[2]}")
                 st.write(f"Industry: {text_pair[3]}")
             plot_correlation_matrix(industry_df)
 if __name__ == "__main__":
+    main()