Spaces:

NCTCMumbai
/

NCTC_SlideFinder

Sleeping

App Files Files Community

NCTCMumbai commited on Mar 24, 2025

Commit

9198e15

verified ·

1 Parent(s): d54454e

Update view_ppt.py

Browse files

Files changed (1) hide show

view_ppt.py +111 -111

view_ppt.py CHANGED Viewed

@@ -1,111 +1,111 @@
-#TAB : VIEW PPT.py
-import requests
-from sentence_transformers import SentenceTransformer, CrossEncoder
-from sklearn.metrics.pairwise import cosine_similarity
-import os
-import shutil
-# Local cache directory for downloaded files
-LOCAL_CACHE_DIR = "local_cache"
-os.makedirs(LOCAL_CACHE_DIR, exist_ok=True)
-# Function to download a file from OneDrive to the local cache
-def download_file_from_onedrive(file_path, file_id, headers):
-    local_file_path = os.path.join(LOCAL_CACHE_DIR, os.path.basename(file_path))
-    if not os.path.exists(local_file_path):  # Avoid re-downloading
-        download_url = f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}/content"
-        response = requests.get(download_url, headers=headers)
-        if response.status_code != 200:
-            raise ValueError(f"Failed to download file {file_path}. Error: {response.text}")
-        with open(local_file_path, "wb") as f:
-            f.write(response.content)
-        print(f"✅ Downloaded: {file_path} -> {local_file_path}")
-    return local_file_path
-# Function to search PPTs
-def search_ppts(query, num_results):
-    global df
-    gr.Info("Searching the relevant PPTs .")
-    # Generate query embedding
-    query_embedding = embedding_model.encode(query).tolist()
-    # Filter the DataFrame to include only rows where Unique_Slide_ID ends with "slide_1"
-    df1 = df[df['Unique_Slide_ID'].str.endswith("slide_1", na=False)]
-    # Compute cosine similarity scores
-    df1['similarity'] = df1['Short_Summary_Embedding'].apply(
-        lambda x: cosine_similarity([query_embedding], [eval(x)])[0][0]
-    )
-    # Sort by cosine similarity score
-    df1 = df1.sort_values(by='similarity', ascending=False)
-    # Get top N results for reranking
-    top_n = min(50, len(df1))  # Take top 50 results for reranking
-    top_results = df1.head(top_n)
-    # Prepare input pairs for cross-encoder reranking
-    pairs = [(query, row['Short_Summary']) for _, row in top_results.iterrows()]
-    # Rerank using cross-encoder
-    gr.Info("Doing Semantic Reranking for most appropriate results ")
-    rerank_scores = cross_encoder.predict(pairs)
-    top_results = top_results.copy()  # Avoid SettingWithCopyWarning
-    top_results['rerank_score'] = rerank_scores
-    # Sort by rerank score
-    top_results = top_results.sort_values(by='rerank_score', ascending=False)
-    print(top_results)
-    # Prepare results
-    results = []
-    gr.Info('Downloading PPT images and ppt')
-    print('Downloading PPT images and ppt')
-    for _, row in top_results.head(num_results).iterrows():
-        # Download slide image locally
-        slide_image_path = download_file_from_onedrive(
-            row['Thumbnail_File_Path'], row['Thumbnail_File_ID'], headers
-        )
-        # Download full PPT locally
-        ppt_download_link = download_file_from_onedrive(
-            row['Full_PPT_File_Path'], row['Full_PPT_File_ID'], headers
-        )
-        title = row['Suitable_Title']
-        owner = row['PPT_Owner']
-        category = row['Slide_Category']
-        summary = row['Short_Summary']
-        results.append({
-            "image": slide_image_path,
-            "title": title,
-            "owner": owner,
-            "category": category,
-            "summary": summary,
-            "download_link": ppt_download_link
-        })
-    print("downloading complete ")
-    # Update visibility of rows
-    visible_rows = min(len(results), num_results)
-    row_updates = []
-    row_updates = []
-    for i in range(20):
-        if i < len(results):
-            result = results[i]
-            row_updates.extend([
-                gr.update(visible=True),  # ✅ Make the row visible
-                gr.update(value=result["image"], visible=True),
-                gr.update(value=f"<b>Title:</b> {result['title']}<br><b>Owner:</b> {result['owner']}<br><b>Category:</b> {result['category']}", visible=True),
-                gr.update(value=result["summary"], visible=True),
-                gr.update(value=result["download_link"], visible=True),
-            ])
-        else:
-            row_updates.extend([gr.update(visible=False)] * 5)  # row + 4 components
-    return row_updates

+#TAB : VIEW PPT.py
+import requests
+from sentence_transformers import SentenceTransformer, CrossEncoder
+from sklearn.metrics.pairwise import cosine_similarity
+import os
+import shutil
+import gradio as gr
+# Local cache directory for downloaded files
+LOCAL_CACHE_DIR = "local_cache"
+os.makedirs(LOCAL_CACHE_DIR, exist_ok=True)
+# Function to download a file from OneDrive to the local cache
+def download_file_from_onedrive(file_path, file_id, headers):
+    local_file_path = os.path.join(LOCAL_CACHE_DIR, os.path.basename(file_path))
+    if not os.path.exists(local_file_path):  # Avoid re-downloading
+        download_url = f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}/content"
+        response = requests.get(download_url, headers=headers)
+        if response.status_code != 200:
+            raise ValueError(f"Failed to download file {file_path}. Error: {response.text}")
+        with open(local_file_path, "wb") as f:
+            f.write(response.content)
+        print(f"✅ Downloaded: {file_path} -> {local_file_path}")
+    return local_file_path
+# Function to search PPTs
+def search_ppts(query, num_results):
+    global df
+    gr.Info("Searching the relevant PPTs .")
+    # Generate query embedding
+    query_embedding = embedding_model.encode(query).tolist()
+    # Filter the DataFrame to include only rows where Unique_Slide_ID ends with "slide_1"
+    df1 = df[df['Unique_Slide_ID'].str.endswith("slide_1", na=False)]
+    # Compute cosine similarity scores
+    df1['similarity'] = df1['Short_Summary_Embedding'].apply(
+        lambda x: cosine_similarity([query_embedding], [eval(x)])[0][0]
+    )
+    # Sort by cosine similarity score
+    df1 = df1.sort_values(by='similarity', ascending=False)
+    # Get top N results for reranking
+    top_n = min(50, len(df1))  # Take top 50 results for reranking
+    top_results = df1.head(top_n)
+    # Prepare input pairs for cross-encoder reranking
+    pairs = [(query, row['Short_Summary']) for _, row in top_results.iterrows()]
+    # Rerank using cross-encoder
+    gr.Info("Doing Semantic Reranking for most appropriate results ")
+    rerank_scores = cross_encoder.predict(pairs)
+    top_results = top_results.copy()  # Avoid SettingWithCopyWarning
+    top_results['rerank_score'] = rerank_scores
+    # Sort by rerank score
+    top_results = top_results.sort_values(by='rerank_score', ascending=False)
+    print(top_results)
+    # Prepare results
+    results = []
+    gr.Info('Downloading PPT images and ppt')
+    print('Downloading PPT images and ppt')
+    for _, row in top_results.head(num_results).iterrows():
+        # Download slide image locally
+        slide_image_path = download_file_from_onedrive(
+            row['Thumbnail_File_Path'], row['Thumbnail_File_ID'], headers
+        )
+        # Download full PPT locally
+        ppt_download_link = download_file_from_onedrive(
+            row['Full_PPT_File_Path'], row['Full_PPT_File_ID'], headers
+        )
+        title = row['Suitable_Title']
+        owner = row['PPT_Owner']
+        category = row['Slide_Category']
+        summary = row['Short_Summary']
+        results.append({
+            "image": slide_image_path,
+            "title": title,
+            "owner": owner,
+            "category": category,
+            "summary": summary,
+            "download_link": ppt_download_link
+        })
+    print("downloading complete ")
+    # Update visibility of rows
+    visible_rows = min(len(results), num_results)
+    row_updates = []
+    row_updates = []
+    for i in range(20):
+        if i < len(results):
+            result = results[i]
+            row_updates.extend([
+                gr.update(visible=True),  # ✅ Make the row visible
+                gr.update(value=result["image"], visible=True),
+                gr.update(value=f"<b>Title:</b> {result['title']}<br><b>Owner:</b> {result['owner']}<br><b>Category:</b> {result['category']}", visible=True),
+                gr.update(value=result["summary"], visible=True),
+                gr.update(value=result["download_link"], visible=True),
+            ])
+        else:
+            row_updates.extend([gr.update(visible=False)] * 5)  # row + 4 components
+    return row_updates