Spaces:

aibmedia
/

aibsimilarityllm

Sleeping

App Files Files Community

aibmedia commited on Dec 15, 2024

Commit

5c1eb47

verified ·

1 Parent(s): e2fdf95

Update main.py

Browse files

Files changed (1) hide show

main.py +232 -21

main.py CHANGED Viewed

@@ -12,6 +12,7 @@ from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
 API_URL0 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
 API_URL1 = "https://api-inference.huggingface.co/models/sentence-transformers/all-mpnet-base-v2"
 API_URL2 = "https://api-inference.huggingface.co/models/sentence-transformers/all-roberta-large-v1"
@@ -50,12 +51,11 @@ async def server_1():
     query_sentence = "Obama's first name"
     duck_results = []
     try:
-        searchduck = DuckDuckGoSearchResults(output_format="list" ,max_results=20, num_results=20)
         duck_results = searchduck.invoke(query_sentence)
     except:
       print("An exception occurred")
     tool = Tool(
         name="google_search",
@@ -94,7 +94,7 @@ async def server_1():
     response1 =  requests.post(API_URL1, headers=headers, json=payload)
     response2 =  requests.post(API_URL2, headers=headers, json=payload)
     response3 =  requests.post(API_URL3, headers=headers, json=payload)
     print("type( response0.json() )")
     print(type(  response0.json() ))
     print(type(  response1.json() ))
@@ -102,13 +102,35 @@ async def server_1():
     print(type(  response3.json() ))
     if type(response0.json()) == list and type(response1.json()) == list and type(response2.json()) == list and type(response3.json()) == list :
         similarity_scores =  response0.json() + response1.json() + response2.json() + response3.json()
     else:
         similarity_scores = "There's an error in llm similarity search retrieval"
-        return all_results
     time.sleep(4)
-    print(similarity_scores)
-    print(type(similarity_scores))
     print("length")
     print(len(similarity_scores))
     key_index = 0
@@ -124,13 +146,13 @@ async def server_1():
         print(value_inlist)
         print("index ")
         print(key_index)
-        if key_index <= 8 :
             resp_list0.append(value_inlist)
-        if key_index <= 17 and key_index > 8 :
             resp_list1.append(value_inlist)
-        if key_index <= 26 and key_index > 17 :
             resp_list2.append(value_inlist)
-        if key_index <= 35 and key_index > 26 :
             resp_list3.append(value_inlist)
         key_index = key_index + 1
@@ -294,6 +316,204 @@ async def server_1():
     else:
         print("No reliable similarity found by 4 llms")
     # index_sorted0 = sorted0_with_index[:4]
     # index_sorted1 = sorted1_with_index[:4]
     # index_sorted2 = sorted2_with_index[:4]
@@ -313,15 +533,6 @@ async def server_1():
     # the top 3 indexes must be above .78 similarity score
     # the top 3 must have occured 4 times or more in combined_indexes
-    time.sleep(4)
-    return all_results
-def threadserver():
-    print('hi')
-    os.system(' ./mxbai-embed-large-v1-f16.llamafile --server --nobrowser')
 if __name__ == '__main__':
   app.run(host='0.0.0.0', port=8080)

 API_URL0 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
 API_URL1 = "https://api-inference.huggingface.co/models/sentence-transformers/all-mpnet-base-v2"
 API_URL2 = "https://api-inference.huggingface.co/models/sentence-transformers/all-roberta-large-v1"
     query_sentence = "Obama's first name"
     duck_results = []
     try:
+        searchduck = DuckDuckGoSearchResults(output_format="list" ,max_results=5, num_results=5)
         duck_results = searchduck.invoke(query_sentence)
     except:
       print("An exception occurred")
     tool = Tool(
         name="google_search",
     response1 =  requests.post(API_URL1, headers=headers, json=payload)
     response2 =  requests.post(API_URL2, headers=headers, json=payload)
     response3 =  requests.post(API_URL3, headers=headers, json=payload)
+    varcontinue_similarity = 0
     print("type( response0.json() )")
     print(type(  response0.json() ))
     print(type(  response1.json() ))
     print(type(  response3.json() ))
     if type(response0.json()) == list and type(response1.json()) == list and type(response2.json()) == list and type(response3.json()) == list :
         similarity_scores =  response0.json() + response1.json() + response2.json() + response3.json()
+        varcontinue_similarity = 1
     else:
         similarity_scores = "There's an error in llm similarity search retrieval"
+        return similarity_scores
     time.sleep(4)
+    result_processed = ""
+    ## if response is all list
+    if varcontinue_similarity == 1 :
+        # call processing with 10 google search result or 15 search results
+        if len(all_results) == 10 :
+            result_processed = process_similarity_10(all_results , similarity_scores )
+        if len(all_results) > 10 :
+            result_processed = process_similarity_15(all_results , similarity_scores )
+    # return all_results
+    return result_processed
+def threadserver():
+    print('hi')
+    os.system(' ./mxbai-embed-large-v1-f16.llamafile --server --nobrowser')
+def process_similarity_15(web_results , similarity_scores):
+    # print(similarity_scores)
+    # print(type(similarity_scores))
     print("length")
     print(len(similarity_scores))
     key_index = 0
         print(value_inlist)
         print("index ")
         print(key_index)
+        if key_index <= 14 :
             resp_list0.append(value_inlist)
+        if key_index <= 29 and key_index > 14 :
             resp_list1.append(value_inlist)
+        if key_index <= 44 and key_index > 29 :
             resp_list2.append(value_inlist)
+        if key_index <= 59 and key_index > 44 :
             resp_list3.append(value_inlist)
         key_index = key_index + 1
     else:
         print("No reliable similarity found by 4 llms")
+    return str( index_sorted0 ) + "," + str( index_sorted1 ) + "," + str( index_sorted2 ) + "," + str( index_sorted3 )
+def process_similarity_10(web_results , similarity_scores):
+    # print(similarity_scores)
+    # print(type(similarity_scores))
+    print("length")
+    print(len(similarity_scores))
+    key_index = 0
+    #copy + loop to get index
+    r_iterator = 0
+    resp_list0 = []
+    resp_list1 = []
+    resp_list2 = []
+    resp_list3 = []
+    for value_inlist in similarity_scores:
+        print(value_inlist)
+        print("index ")
+        print(key_index)
+        if key_index <= 9 :
+            resp_list0.append(value_inlist)
+        if key_index <= 19 and key_index > 9 :
+            resp_list1.append(value_inlist)
+        if key_index <= 29 and key_index > 19 :
+            resp_list2.append(value_inlist)
+        if key_index <= 39 and key_index > 29 :
+            resp_list3.append(value_inlist)
+        key_index = key_index + 1
+    print("The Response list 0 ")
+    print(resp_list0)
+    print("The Response list 1 ")
+    print(resp_list1)
+    print("The Response list 2 ")
+    print(resp_list2)
+    print("The Response list 3 ")
+    print(resp_list3)
+    # sorted 0 - 3 are sorted lists of score ; we must get their indices which is 0-8 that will be mapped to sentence index
+    sorted0 = sorted(resp_list0 , reverse=True)
+    sorted1 = sorted(resp_list1 , reverse=True)
+    sorted2 = sorted(resp_list2 , reverse=True)
+    sorted3 = sorted(resp_list3 , reverse=True)
+    print("the sorted0-3")
+    print(sorted0)
+    print(sorted1)
+    print(sorted2)
+    print(sorted3)
+    print("end the sorted0-3")
+    # Get the index of the sorted list for resp_list0
+    sorted0_with_index = []
+    for x in sorted0:
+        for y in resp_list0:
+            if x == y:
+                print("index of sorted0")
+                print(resp_list0.index(y))
+                if x > 0.90:
+                    sorted0_with_index.append(resp_list0.index(y))
+                    print("sorted_with_index")
+                    print(sorted0_with_index)
+    print("sorted0_with_index")
+    print(sorted0_with_index)
+    sorted1_with_index = []
+    for x in sorted1:
+        for y in resp_list1:
+            if x == y:
+                print("index of sorted1")
+                print(resp_list1.index(y))
+                if y > 0.90:
+                    sorted1_with_index.append(resp_list1.index(y))
+                    print("sorted_with_index")
+                    print(sorted1_with_index)
+    print("sorted1_with_index")
+    print(sorted1_with_index)
+    sorted2_with_index = []
+    print("b4 for x in sorted2:")
+    print("resp_list2:" + str(resp_list2))
+    print("sorted:" + str(sorted2))
+    for x in sorted2:
+        for y in resp_list2:
+            if x == y:
+                print("index of sorted2")
+                print(resp_list2.index(y))
+                if y > 0.90:
+                    sorted2_with_index.append(resp_list2.index(y))
+                    print("sorted_with_index")
+                    print(sorted2_with_index)
+    print("sorted2_with_index")
+    print(sorted2_with_index)
+    sorted3_with_index = []
+    print("b4 for x in sorted3:")
+    print("resp_list3:" + str(resp_list3))
+    for x in sorted3:
+        for y in resp_list3:
+            if x == y:
+                print("index of sorted3")
+                print(resp_list3.index(y))
+                if y > 0.90:
+                    sorted3_with_index.append(resp_list3.index(y))
+                    print("sorted_with_index")
+                    print(sorted3_with_index)
+    print("sorted0-3_with_index")
+    print(sorted0_with_index)
+    print(sorted1_with_index)
+    print(sorted2_with_index)
+    print(sorted3_with_index)
+    index_sorted0 = [] ; index_sorted1 = [] ; index_sorted2 = [] ; index_sorted3 = []
+    # lines 158 onwards is about scenario when sorted0_with_index values are greater then .78
+    # then combine the top 5 values from each list to get the top 3 of 4 llm
+    varcontinue = False
+    # we will only continue if each llm has resulted with values greater than .78 & if these llm result list has at least 2
+    if ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted1_with_index) >= 2 and len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted0_with_index) >= 2 ):
+            # continue variable set to true
+            # indent this here  # if( check if avarage of each any3 resp_list0-3 average is 0.85 or above ) !!then only continue!!!
+        varcontinue = True
+        print("continue variable set to true")
+    if ( len(sorted0_with_index) >= 2 and len(sorted1_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or  ( len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or  ( len(sorted1_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ):
+            # continue variable set to true
+                 # if( check if avarage of any3 resp_list0-3 average is 0.85 or above )!!then only continue!!!
+        varcontinue = True
+        print("continue variable set to true")
+        # check if llm 1 - 3 has minimum 3
+    if varcontinue == True:
+        if len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0 :
+            print("len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0")
+            print(len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0)
+            print("sorted0_with_index")
+            print(sorted0_with_index)
+            for x in sorted0_with_index :
+                index_sorted0.append(x)
+            remaining_padding = 5 - len(index_sorted0)
+            while remaining_padding > 0 :
+                remaining_padding= remaining_padding - 1
+                index_sorted0.append(index_sorted0[0])
+        if len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0 :
+            print("len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0")
+            print(len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0)
+            print("sorted1_with_index")
+            print(sorted1_with_index)
+            for x in sorted1_with_index :
+                index_sorted1.append(x)
+            remaining_padding = 5 - len(index_sorted1)
+            while remaining_padding > 0 :
+                remaining_padding= remaining_padding - 1
+                index_sorted1.append(index_sorted1[0])
+        if len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0 :
+            print("len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0")
+            print(len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0)
+            print("sorted2_with_index")
+            print(sorted2_with_index)
+            for x in sorted2_with_index :
+                index_sorted2.append(x)
+            remaining_padding = 5 - len(index_sorted2)
+            while remaining_padding > 0 :
+                remaining_padding= remaining_padding - 1
+                index_sorted2.append(index_sorted2[0])
+        if len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0 :
+                print("len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0")
+                print(len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0)
+                print("sorted3_with_index")
+                print(sorted3_with_index)
+                for x in sorted3_with_index :
+                    index_sorted3.append(x)
+                remaining_padding = 5 - len(index_sorted3)
+                while remaining_padding > 0 :
+                    remaining_padding= remaining_padding - 1
+                    index_sorted3.append(index_sorted3[0])
+        print("index_sorted0-1")
+        print(index_sorted0)
+        print(index_sorted1)
+        print(index_sorted2)
+        print(index_sorted3)
+    else:
+        print("No reliable similarity found by 4 llms")
+    return str( index_sorted0 ) + "," + str( index_sorted1 ) + "," + str( index_sorted2 ) + "," + str( index_sorted3 )
     # index_sorted0 = sorted0_with_index[:4]
     # index_sorted1 = sorted1_with_index[:4]
     # index_sorted2 = sorted2_with_index[:4]
     # the top 3 indexes must be above .78 similarity score
     # the top 3 must have occured 4 times or more in combined_indexes
 if __name__ == '__main__':
   app.run(host='0.0.0.0', port=8080)