ELHACHYMI commited on
Commit
a316fee
·
verified ·
1 Parent(s): 16c7847

add search title

Browse files
Files changed (1) hide show
  1. src/mcp/server.py +40 -0
src/mcp/server.py CHANGED
@@ -9,6 +9,13 @@ from dotenv import load_dotenv
9
  import os
10
 
11
 
 
 
 
 
 
 
 
12
  LOG_FILE = os.path.join(LOG_DIR, "Agents.log")
13
  logging.basicConfig(
14
  filename=LOG_FILE,
@@ -123,6 +130,39 @@ async def summarize_video_by_link(link: str):
123
  })
124
  return prompt
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  def sample_chunks(chunks, n=3):
127
  """Pick N evenly spaced chunks across the whole video."""
128
 
 
9
  import os
10
 
11
 
12
+ from sentence_transformers import SentenceTransformer
13
+ import faiss
14
+ import numpy as np
15
+ from src.configs.config import TITLE_FAISS_INDEX_FILE, TITLE_EMBEDDINGS_FILE, EMBEDDING_MODEL, METADATA_FILE
16
+ import pandas as pd
17
+
18
+
19
  LOG_FILE = os.path.join(LOG_DIR, "Agents.log")
20
  logging.basicConfig(
21
  filename=LOG_FILE,
 
130
  })
131
  return prompt
132
 
133
+ @mcp.prompt(
134
+ name="summarize_doc_by_title",
135
+ description="Summarize a document using its title"
136
+ )
137
+ async def summarize_doc_by_title(title_query: str):
138
+ result = search_title(title_query)
139
+ link = result["link"]
140
+ return await summarize_doc_by_link(link)
141
+
142
+ @mcp.prompt(
143
+ name="summarize_document",
144
+ description="Summarize a document by link or title. If the query contains 'https', it is treated as a link."
145
+ )
146
+ async def summarize_document(query: str):
147
+ if "https" in query:
148
+ return await summarize_doc_by_link(query)
149
+ else:
150
+ return await summarize_doc_by_title(query)
151
+ # search_title
152
+ def search_title(query):
153
+ # Load index and metadata
154
+ index = faiss.read_index(str(TITLE_FAISS_INDEX_FILE))
155
+ model = SentenceTransformer(EMBEDDING_MODEL)
156
+ metadata = pd.read_csv(METADATA_FILE)
157
+ titles = metadata["Nom du document"].tolist()
158
+ links = metadata["Lien"].tolist()
159
+ # Embed query
160
+ query_vec = model.encode([query], convert_to_tensor=False)
161
+ D, I = index.search(np.array(query_vec), k=1)
162
+ best_idx = I[0][0]
163
+ return {"title": titles[best_idx], "link": links[best_idx]}
164
+
165
+
166
  def sample_chunks(chunks, n=3):
167
  """Pick N evenly spaced chunks across the whole video."""
168