Spaces:

sivan22
/

sefaria-ref-finder

Sleeping

App Files Files Community

sivan22 commited on Jan 14, 2024

Commit

c1fdc9d

verified ·

1 Parent(s): e710286

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

app.py +27 -44
requirements.txt +0 -1
run.bat +1 -0
test42.db +2 -2
upload to hub.py +8 -0

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import streamlit as st
 from streamlit.logger import get_logger
-import gematriapy
 from timeit import default_timer as timer
 import sqlite3
-import ast
 import pandas as pd
 LOGGER = get_logger(__name__)
@@ -13,41 +11,21 @@ def preprocess(s:str)->str:
 @st.cache_resource
 def get_dfs()->object:
-    def to_daf_long(i:int)->str:
-        if i>0 and i<999:
-            i+=1
-            if  i%2 ==0:
-                return gematriapy.to_hebrew(i//2)+' עמוד א '
-            else:
-                return gematriapy.to_hebrew(i//2)+' עמוד ב'
-        return i
-    def gematria(i)->str:
-        if type(i) == int and i>0 and i<999:
-            return gematriapy.to_hebrew(i) + ' '
-        else: return i if type(i)==str else ''
     print('hello from get_dfs..')
     # //get the books table//
     # Connect to the database
     conn = sqlite3.connect('test42.db')
     # Query the database and retrieve the results
-    cursor = conn.execute("SELECT * FROM books")
     results = cursor.fetchall()
     # Convert the query results into a Pandas DataFrame
-    books = pd.DataFrame(list(results))
-    books.columns=list(map(lambda x: x[0], cursor.description))
-    # convert the array format string "["Section","Section"]"  that came from the database into a real array [Section,Section]
-    books['heSectionNames']=books['heSectionNames'].apply(lambda x: ast.literal_eval(x) if x is not None else [''] )
     # //get the texts table//
     # Query the database and retrieve the results
     cursor = conn.execute("SELECT * FROM texts")
     results = cursor.fetchall()
@@ -55,30 +33,30 @@ def get_dfs()->object:
     # Convert the query results into a Pandas DataFrame
     texts = pd.DataFrame(results)
     texts.columns=list(map(lambda x: x[0], cursor.description))
-    # // get the table that includes the titles//
-  # Query the database and retrieve the results
-    cursor = conn.execute("SELECT * FROM titles")
     results = cursor.fetchall()
     # Convert the query results into a Pandas DataFrame
-    titles = pd.DataFrame(results)
-    titles.columns=list(map(lambda x: x[0], cursor.description))
-    # merge the texts with the original books table (without the extra hebrew titles)
-    merged = pd.merge(texts,books,how='inner',left_on='bid',right_on='_id')
-    #convert the Talmud marks (1,2,3...) into dafs (א עמוד א..)
-    has_dafs = merged.loc[merged['heSectionNames'].apply(lambda x: True if len(x)>1 and x[-2] == 'דף' else False)==True]
-    merged.loc[has_dafs.index,'level2'] = has_dafs['level2'].map(to_daf_long)
-    # create a reference text, for exapmle: רש"י על בראשית פרק א פסוק א
-    merged['ref_text_long']= merged['heTitle'] + ' ' + \
-        merged['heSectionNames'].map(lambda x:x[-4] + ' ' if len(x)>3 else "")  + merged['level4'].map(gematria) + \
-        merged['heSectionNames'].map(lambda x:x[-3] + ' ' if len(x)>2 else "")  + merged['level3'].map(gematria) + \
-        merged['heSectionNames'].map(lambda x:x[-2] + ' ' if len(x)>1 else "")  +  merged['level2'].map(gematria)
     titles_df = titles
-    texts_df = merged
     return titles_df, texts_df
@@ -137,8 +115,13 @@ def run():
         results = find_ref(titles_df,texts_df,user_input,top_k,num_of_results,algorithm)
         time = f"finished in {1e3*(timer()-time0):.1f} ms"
         st.write(time)
-        for result in results:
             st.write(result)
 if __name__ == "__main__":
     run()

 import streamlit as st
 from streamlit.logger import get_logger
 from timeit import default_timer as timer
 import sqlite3
 import pandas as pd
 LOGGER = get_logger(__name__)
 @st.cache_resource
 def get_dfs()->object:
     print('hello from get_dfs..')
     # //get the books table//
     # Connect to the database
     conn = sqlite3.connect('test42.db')
     # Query the database and retrieve the results
+    cursor = conn.execute("SELECT * FROM titles")
     results = cursor.fetchall()
     # Convert the query results into a Pandas DataFrame
+    titles = pd.DataFrame(results)
+    titles.columns=list(map(lambda x: x[0], cursor.description))
     # //get the texts table//
     # Query the database and retrieve the results
     cursor = conn.execute("SELECT * FROM texts")
     results = cursor.fetchall()
     # Convert the query results into a Pandas DataFrame
     texts = pd.DataFrame(results)
     texts.columns=list(map(lambda x: x[0], cursor.description))
+    # //get the references database
+   # Query the database and retrieve the results
+    cursor = conn.execute("SELECT * FROM refs")
+    results = cursor.fetchall()
+    # Convert the query results into a Pandas DataFrame
+    refs = pd.DataFrame(results)
+    refs.columns=list(map(lambda x: x[0], cursor.description))
+   # Query the database and retrieve the results
+    cursor = conn.execute("SELECT * FROM books")
     results = cursor.fetchall()
     # Convert the query results into a Pandas DataFrame
+    books = pd.DataFrame(list(results))
+    books.columns=list(map(lambda x: x[0], cursor.description))
+    #merge the books and refs with the texts
+    merged = pd.merge(texts,books,how='inner',left_on='bid',right_on='_id')
+    texts_df = pd.merge(merged,refs,left_on='_id_x',right_on='tid')
     titles_df = titles
     return titles_df, texts_df
         results = find_ref(titles_df,texts_df,user_input,top_k,num_of_results,algorithm)
         time = f"finished in {1e3*(timer()-time0):.1f} ms"
         st.write(time)
+        buttons = []
+        for i, result in enumerate(results):
             st.write(result)
+            buttons.append(st.button("פתח " +result['ref'],i))
+            if buttons[i]:
+                st.write(texts_df.loc[texts_df['ref_text_long']==result['ref']][['heText','ref_text_long']])
 if __name__ == "__main__":
     run()

requirements.txt CHANGED Viewed

@@ -1,3 +1,2 @@
 gematriapy
 pandas
-rapidfuzz


1	gematriapy
2	pandas

run.bat ADDED Viewed

	@@ -0,0 +1 @@


1	+ streamlit run app.py

test42.db CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76e5c2fa4efd1ec73ec3babf569b831182849d1ce1e46fdadbd2a6e54aa538c4
-size 2063155200

 version https://git-lfs.github.com/spec/v1
+oid sha256:022710c8c0e53a525b01fb59f33b88605c0c6c2989b86340bf85c77cb16f8556
+size 2225819648

upload to hub.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from huggingface_hub import HfApi
+api = HfApi()
+api.upload_folder(
+    folder_path="./",
+    repo_id="sivan22/sefaria-ref-finder",
+    repo_type="space",
+)