Spaces:
Build error
Build error
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer | |
| import pandas as pd | |
| import pickle | |
| from pathlib import Path | |
| import time | |
| from datetime import datetime | |
| print("load model start") | |
| print(datetime.fromtimestamp(time.time())) | |
| model = SentenceTransformer('intfloat/multilingual-e5-large-instruct') | |
| print("load model end") | |
| print(datetime.fromtimestamp(time.time())) | |
| quran = pd.read_csv('quran-eng.csv', delimiter=",") | |
| print("load quran eng") | |
| print(datetime.fromtimestamp(time.time())) | |
| file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb') | |
| document_embeddings = pickle.load(file) | |
| print("load quran embedding") | |
| print(datetime.fromtimestamp(time.time())) | |
| def make_clickable_both(val): | |
| name, url = val.split('#') | |
| print(name+"\n") | |
| print(url+"\n") | |
| return f'<a href="{url}">{name}</a>' | |
| def find(query): | |
| print("start") | |
| print(datetime.fromtimestamp(time.time())) | |
| def get_detailed_instruct(task_description: str, query: str) -> str: | |
| return f'Instruct: {task_description}\nQuery: {query}' | |
| # Each query must come with a one-sentence instruction that describes the task | |
| task = 'Given a web search query, retrieve relevant passages that answer the query' | |
| queries = [ | |
| get_detailed_instruct(task, query) | |
| ] | |
| #file = open('quran-splitted.sav','rb') | |
| #quran_splitted = pickle.load(file) | |
| #print("load quran\n") | |
| #print(datetime.fromtimestamp(time.time())) | |
| #documents = quran_splitted['text'].tolist() | |
| # document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True) | |
| # filename = 'encoded_quran_text_split_multilingual-e5-large-instruct.sav' | |
| # pickle.dump(embeddings, open(filename, 'wb')) | |
| query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True) | |
| print("embed query") | |
| print(datetime.fromtimestamp(time.time())) | |
| scores = (query_embeddings @ document_embeddings.T) * 100 | |
| print("count similarities") | |
| print(datetime.fromtimestamp(time.time())) | |
| # insert the similarity value to dataframe & sort it | |
| file = open('quran-splitted.sav','rb') | |
| quran_splitted = pickle.load(file) | |
| print("load quran") | |
| print(datetime.fromtimestamp(time.time())) | |
| quran_splitted['similarity'] = scores.tolist()[0] | |
| sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False) | |
| print("sort by similarity") | |
| print(datetime.fromtimestamp(time.time())) | |
| #results = "" | |
| results = pd.DataFrame() | |
| i = 0 | |
| while i<3: | |
| result = sorted_quran.iloc[i] | |
| result_quran = quran.loc[(quran['sura']==result['sura']) & (quran['aya']==result['aya'])] | |
| results = pd.concat([results, result_quran]) | |
| #results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n" | |
| i=i+1 | |
| print("collect results") | |
| print(datetime.fromtimestamp(time.time())) | |
| url = 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir' | |
| results['text'] = '<a href="'+url+'">'+results['text']+ '</a>' + ' (QS. ' + results['sura'].astype(str) + ':' + results['aya'].astype(str) + ')' | |
| results = results.drop(columns=['sura', 'aya']) | |
| #results['text'] = results['text'] + '#' + 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir' | |
| #results = results.style.format({'text': make_clickable_both}) | |
| #return sorted_quran | |
| #filepath = Path(query+'.csv') | |
| #results.to_csv(filepath,index=False) | |
| #return results, filepath | |
| return results | |
| demo = gr.Interface( | |
| fn=find, | |
| inputs="textbox", | |
| #outputs=[gr.Dataframe(headers=['text'],datatype=["markdown"],wrap=True),gr.DownloadButton()], | |
| outputs=[gr.Dataframe(headers=['text'],datatype=["markdown"],wrap=True)], | |
| cache_examples="lazy", | |
| examples=[ | |
| ["law of inheritance in islam"], | |
| ["tunjukilah jalan yang lurus"], | |
| ["سليمان"], | |
| ], | |
| title="Quran Finder") | |
| #demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox") | |
| if __name__ == "__main__": | |
| demo.launch() |