Spaces:

HikmaLabs
/

Isnad_Taraf_Visualizer

Sleeping

App Files Files Community

FDSRashid commited on Dec 22, 2024

Commit

f59ce12

verified ·

1 Parent(s): a77555e

Update app.py

Browse files

added fast lookup for hadith

Files changed (1) hide show

app.py +60 -27

app.py CHANGED Viewed

@@ -10,7 +10,9 @@ from datasets import Value
 from datasets import Dataset
 import matplotlib.pyplot as plt
 import re
 pattern = r'"(.*?)"'
 # this pattern captures anything in a double quotes.
@@ -56,6 +58,22 @@ matn_info['Book_ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split(
 matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
 matn_info = pd.merge(matn_info, books, on='Book_ID')
 def value_to_hex(value):
     rgba_color = cmap(value)
     return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
@@ -72,37 +90,52 @@ def get_node_info(node):
 def visualize_isnad(taraf_num, yaxis):
     taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
     taraf_hadith = taraf['bookid_hadithid'].to_list()
-    taraf_matns = taraf['matn'].to_list()
-    taraf_hadith_split = [i.split('_') for i in taraf_hadith]
-    taraf_book = taraf['Book_Name'].to_list()
-    taraf_author = taraf['Author'].to_list()
-    taraf_hadith_number = taraf['Hadith Number'].to_list()
-    lst_hadith = []
     hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
     isnad_hadith = isnad_info[hadith_cleaned]
-    for i in range(len(taraf_hadith_split)):
-        # This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info
-        #This loop get the end transmitter of each Hadith in the Taraf
-        isnad_in_hadith1 = isnad_hadith['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
-        isnad_hadith1 = isnad_hadith[isnad_in_hadith1][['Source', 'Destination']]
-        G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
-        node = [int(n) for n, d in G.out_degree() if d == 0]
-        for n in node:
-            gen_node = narrator_bios[narrator_bios['Rawi ID']==n]['Generation'].to_list()
-            if len(gen_node):
-                gen_node = gen_node[0]
-            else:
-                gen_node = -1
-            name_node = narrator_bios[narrator_bios['Rawi ID']==n]['Famous Name'].to_list()
-            if len(name_node):
-                name_node = name_node[0]
-            else:
-                name_node = 'فلان'
-            lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i], str(n), i])
-    df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Book Hadith Number', 'End Transmitter ID', 'Hadith Number'])
     isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
     isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
     isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'فلان')

 from datasets import Dataset
 import matplotlib.pyplot as plt
 import re
+from collections import defaultdict
+from huggingface_hub import hf_hub_download
 pattern = r'"(.*?)"'
 # this pattern captures anything in a double quotes.
 matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
 matn_info = pd.merge(matn_info, books, on='Book_ID')
+from huggingface_hub import hf_hub_download
+# Download and read a file
+file_path = hf_hub_download(
+    repo_id="FDSRashid/hadith_info",  # read in fast lookup data structure
+    filename="hadith_lookup.json",
+    repo_type="dataset",
+    token=Secret_token,
+)
+with open(file_path, 'r') as f:
+    hadith_lookup_dict = json.load(f)
+hadith_lookup = defaultdict(list, hadith_lookup_dict)
 def value_to_hex(value):
     rgba_color = cmap(value)
     return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
 def visualize_isnad(taraf_num, yaxis):
+    # Precompute filtered dataframes
     taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
     taraf_hadith = taraf['bookid_hadithid'].to_list()
+    # Precompute hadiths where taraf_num exists
     hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
     isnad_hadith = isnad_info[hadith_cleaned]
+    lst_hadith = []
+    for i, hadith_parts in enumerate(taraf_hadith):
+        # look up hadith for each bookid_hadithid
+        isnad_hadith1 = isnad_info.iloc[hadith_lookup[taraf_hadith[i]]][['Source', 'Destination']]
+        # Create graph and find end nodes
+        G = nx.from_pandas_edgelist(isnad_hadith1, source='Source', target='Destination', create_using=nx.DiGraph())
+        nodes = [int(n) for n, d in G.out_degree() if d == 0]
+        if nodes:
+            # Batch fetch data from narrator_bios for efficiency
+            bio_data = narrator_bios[narrator_bios['Rawi ID'].isin(nodes)]
+            for n in nodes:
+                gen_node = bio_data.loc[bio_data['Rawi ID'] == n, 'Generation'].squeeze()
+                gen_node = gen_node if pd.notna(gen_node) else -1
+                name_node = bio_data.loc[bio_data['Rawi ID'] == n, 'Famous Name'].squeeze()
+                name_node = name_node if pd.notna(name_node) else 'فلان'
+                # Append result for each node
+                lst_hadith.append([
+                    taraf.iloc[i]['matn'],
+                    gen_node,
+                    name_node,
+                    taraf.iloc[i]['Book_Name'],
+                    taraf.iloc[i]['Author'],
+                    taraf.iloc[i]['Hadith Number'],
+                    str(n),
+                    i
+                ])
+    # Convert to DataFrame
+    df = pd.DataFrame(lst_hadith, columns=['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Book Hadith Number', 'End Transmitter ID', 'Hadith Number'])
     isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
     isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
     isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'فلان')