Spaces:
Sleeping
Sleeping
Update app.py
Browse filesadded fast lookup for hadith
app.py
CHANGED
|
@@ -10,7 +10,9 @@ from datasets import Value
|
|
| 10 |
from datasets import Dataset
|
| 11 |
import matplotlib.pyplot as plt
|
| 12 |
import re
|
| 13 |
-
|
|
|
|
|
|
|
| 14 |
pattern = r'"(.*?)"'
|
| 15 |
# this pattern captures anything in a double quotes.
|
| 16 |
|
|
@@ -56,6 +58,22 @@ matn_info['Book_ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split(
|
|
| 56 |
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
|
| 57 |
matn_info = pd.merge(matn_info, books, on='Book_ID')
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
def value_to_hex(value):
|
| 60 |
rgba_color = cmap(value)
|
| 61 |
return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
|
|
@@ -72,37 +90,52 @@ def get_node_info(node):
|
|
| 72 |
|
| 73 |
|
| 74 |
def visualize_isnad(taraf_num, yaxis):
|
|
|
|
| 75 |
taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
|
| 76 |
taraf_hadith = taraf['bookid_hadithid'].to_list()
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
taraf_author = taraf['Author'].to_list()
|
| 81 |
-
taraf_hadith_number = taraf['Hadith Number'].to_list()
|
| 82 |
-
lst_hadith = []
|
| 83 |
hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
|
| 84 |
isnad_hadith = isnad_info[hadith_cleaned]
|
| 85 |
-
for i in range(len(taraf_hadith_split)):
|
| 86 |
-
# This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info
|
| 87 |
-
#This loop get the end transmitter of each Hadith in the Taraf
|
| 88 |
-
isnad_in_hadith1 = isnad_hadith['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
|
| 89 |
-
isnad_hadith1 = isnad_hadith[isnad_in_hadith1][['Source', 'Destination']]
|
| 90 |
-
G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
|
| 91 |
-
node = [int(n) for n, d in G.out_degree() if d == 0]
|
| 92 |
-
for n in node:
|
| 93 |
-
gen_node = narrator_bios[narrator_bios['Rawi ID']==n]['Generation'].to_list()
|
| 94 |
-
if len(gen_node):
|
| 95 |
-
gen_node = gen_node[0]
|
| 96 |
-
else:
|
| 97 |
-
gen_node = -1
|
| 98 |
-
name_node = narrator_bios[narrator_bios['Rawi ID']==n]['Famous Name'].to_list()
|
| 99 |
-
if len(name_node):
|
| 100 |
-
name_node = name_node[0]
|
| 101 |
-
else:
|
| 102 |
-
name_node = 'فلان'
|
| 103 |
-
lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i], str(n), i])
|
| 104 |
-
df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Book Hadith Number', 'End Transmitter ID', 'Hadith Number'])
|
| 105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
|
| 107 |
isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
|
| 108 |
isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'فلان')
|
|
|
|
| 10 |
from datasets import Dataset
|
| 11 |
import matplotlib.pyplot as plt
|
| 12 |
import re
|
| 13 |
+
from collections import defaultdict
|
| 14 |
+
from huggingface_hub import hf_hub_download
|
| 15 |
+
|
| 16 |
pattern = r'"(.*?)"'
|
| 17 |
# this pattern captures anything in a double quotes.
|
| 18 |
|
|
|
|
| 58 |
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
|
| 59 |
matn_info = pd.merge(matn_info, books, on='Book_ID')
|
| 60 |
|
| 61 |
+
|
| 62 |
+
from huggingface_hub import hf_hub_download
|
| 63 |
+
|
| 64 |
+
# Download and read a file
|
| 65 |
+
file_path = hf_hub_download(
|
| 66 |
+
repo_id="FDSRashid/hadith_info", # read in fast lookup data structure
|
| 67 |
+
filename="hadith_lookup.json",
|
| 68 |
+
repo_type="dataset",
|
| 69 |
+
token=Secret_token,
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
with open(file_path, 'r') as f:
|
| 73 |
+
hadith_lookup_dict = json.load(f)
|
| 74 |
+
hadith_lookup = defaultdict(list, hadith_lookup_dict)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
def value_to_hex(value):
|
| 78 |
rgba_color = cmap(value)
|
| 79 |
return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
|
|
|
|
| 90 |
|
| 91 |
|
| 92 |
def visualize_isnad(taraf_num, yaxis):
|
| 93 |
+
# Precompute filtered dataframes
|
| 94 |
taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
|
| 95 |
taraf_hadith = taraf['bookid_hadithid'].to_list()
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
# Precompute hadiths where taraf_num exists
|
|
|
|
|
|
|
|
|
|
| 99 |
hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
|
| 100 |
isnad_hadith = isnad_info[hadith_cleaned]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
+
lst_hadith = []
|
| 103 |
+
|
| 104 |
+
for i, hadith_parts in enumerate(taraf_hadith):
|
| 105 |
+
# look up hadith for each bookid_hadithid
|
| 106 |
+
isnad_hadith1 = isnad_info.iloc[hadith_lookup[taraf_hadith[i]]][['Source', 'Destination']]
|
| 107 |
+
|
| 108 |
+
# Create graph and find end nodes
|
| 109 |
+
G = nx.from_pandas_edgelist(isnad_hadith1, source='Source', target='Destination', create_using=nx.DiGraph())
|
| 110 |
+
nodes = [int(n) for n, d in G.out_degree() if d == 0]
|
| 111 |
+
|
| 112 |
+
if nodes:
|
| 113 |
+
# Batch fetch data from narrator_bios for efficiency
|
| 114 |
+
bio_data = narrator_bios[narrator_bios['Rawi ID'].isin(nodes)]
|
| 115 |
+
|
| 116 |
+
for n in nodes:
|
| 117 |
+
gen_node = bio_data.loc[bio_data['Rawi ID'] == n, 'Generation'].squeeze()
|
| 118 |
+
gen_node = gen_node if pd.notna(gen_node) else -1
|
| 119 |
+
|
| 120 |
+
name_node = bio_data.loc[bio_data['Rawi ID'] == n, 'Famous Name'].squeeze()
|
| 121 |
+
name_node = name_node if pd.notna(name_node) else 'فلان'
|
| 122 |
+
|
| 123 |
+
# Append result for each node
|
| 124 |
+
lst_hadith.append([
|
| 125 |
+
taraf.iloc[i]['matn'],
|
| 126 |
+
gen_node,
|
| 127 |
+
name_node,
|
| 128 |
+
taraf.iloc[i]['Book_Name'],
|
| 129 |
+
taraf.iloc[i]['Author'],
|
| 130 |
+
taraf.iloc[i]['Hadith Number'],
|
| 131 |
+
str(n),
|
| 132 |
+
i
|
| 133 |
+
])
|
| 134 |
+
|
| 135 |
+
# Convert to DataFrame
|
| 136 |
+
df = pd.DataFrame(lst_hadith, columns=['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Book Hadith Number', 'End Transmitter ID', 'Hadith Number'])
|
| 137 |
+
|
| 138 |
+
|
| 139 |
isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
|
| 140 |
isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
|
| 141 |
isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'فلان')
|