Sentinel-AI-Web-Search-Test-v2-Testing-Score

Build error

App Files Files Community

Shreyas094 commited on Jul 8, 2024

Commit

0d127c5

verified ·

1 Parent(s): d8b711d

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -22

app.py CHANGED Viewed

@@ -273,12 +273,16 @@ def rank_search_results(titles, summaries, model):
     try:
         ranks_str = generate_chunked_response(model, ranking_prompt)
         ranks = [float(rank.strip()) for rank in ranks_str.split(',') if rank.strip()]
-        # Check if we have the correct number of ranks
         if len(ranks) != len(titles):
             print(f"Warning: Number of ranks ({len(ranks)}) does not match number of titles ({len(titles)})")
-            print(f"Model output: {ranks_str}")
             return list(range(1, len(titles) + 1))
         return ranks
@@ -295,12 +299,6 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search):
     model = get_model(temperature, top_p, repetition_penalty)
     embed = get_embeddings()
-    # Check if the FAISS database exists
-    if os.path.exists("faiss_database"):
-        database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
-    else:
-        database = None
     if web_search:
         search_results = google_search(question)
@@ -323,6 +321,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search):
         if not processed_results:
             return "No valid search results found."
         # Rank the results
         titles = [r["title"] for r in processed_results]
         summaries = [r["summary"] for r in processed_results]
@@ -332,6 +332,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search):
             print(f"Error in ranking results: {str(e)}. Using default ranking.")
             ranks = list(range(1, len(processed_results) + 1))
         # Update Vector DB
         current_date = datetime.now().strftime("%Y-%m-%d")
         update_vector_db_with_search_results(processed_results, ranks, current_date)
@@ -416,32 +418,45 @@ def update_vectors(files, use_recursive_splitter):
     return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files."
-def update_vector_db_with_search_results(search_results, summaries, ranks):
     embed = get_embeddings()
-    database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True) if os.path.exists("faiss_database") else FAISS.from_documents([], embed)
-    current_date = datetime.now().strftime("%Y-%m-%d")
     documents = []
-    for result, summary, rank in zip(search_results, summaries, ranks):
-        if summary:  # Only create a document if there's a summary
             doc = Document(
-                page_content=summary,
                 metadata={
                     "search_date": current_date,
-                    "search_title": result["title"],
-                    "search_content": result["text"],
-                    "search_summary": summary,
                     "rank": rank
                 }
             )
             documents.append(doc)
-    if documents:  # Only update the database if there are documents to add
-        database.add_documents(documents)
-        database.save_local("faiss_database")
-    else:
         print("No valid documents to add to the database.")
 def export_vector_db_to_excel():
     embed = get_embeddings()

     try:
         ranks_str = generate_chunked_response(model, ranking_prompt)
+        print(f"Model output for ranking: {ranks_str}")
+        if not ranks_str.strip():
+            print("Model returned an empty string for ranking.")
+            return list(range(1, len(titles) + 1))
         ranks = [float(rank.strip()) for rank in ranks_str.split(',') if rank.strip()]
         if len(ranks) != len(titles):
             print(f"Warning: Number of ranks ({len(ranks)}) does not match number of titles ({len(titles)})")
             return list(range(1, len(titles) + 1))
         return ranks
     model = get_model(temperature, top_p, repetition_penalty)
     embed = get_embeddings()
     if web_search:
         search_results = google_search(question)
         if not processed_results:
             return "No valid search results found."
+        print(f"Number of processed results: {len(processed_results)}")
         # Rank the results
         titles = [r["title"] for r in processed_results]
         summaries = [r["summary"] for r in processed_results]
             print(f"Error in ranking results: {str(e)}. Using default ranking.")
             ranks = list(range(1, len(processed_results) + 1))
+        print(f"Number of ranks: {len(ranks)}")
         # Update Vector DB
         current_date = datetime.now().strftime("%Y-%m-%d")
         update_vector_db_with_search_results(processed_results, ranks, current_date)
     return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files."
+def update_vector_db_with_search_results(search_results, ranks, current_date):
     embed = get_embeddings()
     documents = []
+    for result, rank in zip(search_results, ranks):
+        if result.get("summary"):
             doc = Document(
+                page_content=result["summary"],
                 metadata={
                     "search_date": current_date,
+                    "search_title": result.get("title", ""),
+                    "search_content": result.get("content", ""),
+                    "search_summary": result["summary"],
                     "rank": rank
                 }
             )
             documents.append(doc)
+    if not documents:
         print("No valid documents to add to the database.")
+        return
+    texts = [doc.page_content for doc in documents]
+    metadatas = [doc.metadata for doc in documents]
+    print(f"Number of documents to embed: {len(texts)}")
+    print(f"First document text: {texts[0][:100]}...")  # Print first 100 characters of the first document
+    try:
+        if os.path.exists("faiss_database"):
+            database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
+            database.add_texts(texts, metadatas=metadatas)
+        else:
+            database = FAISS.from_texts(texts, embed, metadatas=metadatas)
+        database.save_local("faiss_database")
+        print("Database updated successfully.")
+    except Exception as e:
+        print(f"Error updating database: {str(e)}")
 def export_vector_db_to_excel():
     embed = get_embeddings()