Spaces:
Sleeping
Sleeping
add search title
Browse files- src/mcp/server.py +40 -0
src/mcp/server.py
CHANGED
|
@@ -9,6 +9,13 @@ from dotenv import load_dotenv
|
|
| 9 |
import os
|
| 10 |
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
LOG_FILE = os.path.join(LOG_DIR, "Agents.log")
|
| 13 |
logging.basicConfig(
|
| 14 |
filename=LOG_FILE,
|
|
@@ -123,6 +130,39 @@ async def summarize_video_by_link(link: str):
|
|
| 123 |
})
|
| 124 |
return prompt
|
| 125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
def sample_chunks(chunks, n=3):
|
| 127 |
"""Pick N evenly spaced chunks across the whole video."""
|
| 128 |
|
|
|
|
| 9 |
import os
|
| 10 |
|
| 11 |
|
| 12 |
+
from sentence_transformers import SentenceTransformer
|
| 13 |
+
import faiss
|
| 14 |
+
import numpy as np
|
| 15 |
+
from src.configs.config import TITLE_FAISS_INDEX_FILE, TITLE_EMBEDDINGS_FILE, EMBEDDING_MODEL, METADATA_FILE
|
| 16 |
+
import pandas as pd
|
| 17 |
+
|
| 18 |
+
|
| 19 |
LOG_FILE = os.path.join(LOG_DIR, "Agents.log")
|
| 20 |
logging.basicConfig(
|
| 21 |
filename=LOG_FILE,
|
|
|
|
| 130 |
})
|
| 131 |
return prompt
|
| 132 |
|
| 133 |
+
@mcp.prompt(
|
| 134 |
+
name="summarize_doc_by_title",
|
| 135 |
+
description="Summarize a document using its title"
|
| 136 |
+
)
|
| 137 |
+
async def summarize_doc_by_title(title_query: str):
|
| 138 |
+
result = search_title(title_query)
|
| 139 |
+
link = result["link"]
|
| 140 |
+
return await summarize_doc_by_link(link)
|
| 141 |
+
|
| 142 |
+
@mcp.prompt(
|
| 143 |
+
name="summarize_document",
|
| 144 |
+
description="Summarize a document by link or title. If the query contains 'https', it is treated as a link."
|
| 145 |
+
)
|
| 146 |
+
async def summarize_document(query: str):
|
| 147 |
+
if "https" in query:
|
| 148 |
+
return await summarize_doc_by_link(query)
|
| 149 |
+
else:
|
| 150 |
+
return await summarize_doc_by_title(query)
|
| 151 |
+
# search_title
|
| 152 |
+
def search_title(query):
|
| 153 |
+
# Load index and metadata
|
| 154 |
+
index = faiss.read_index(str(TITLE_FAISS_INDEX_FILE))
|
| 155 |
+
model = SentenceTransformer(EMBEDDING_MODEL)
|
| 156 |
+
metadata = pd.read_csv(METADATA_FILE)
|
| 157 |
+
titles = metadata["Nom du document"].tolist()
|
| 158 |
+
links = metadata["Lien"].tolist()
|
| 159 |
+
# Embed query
|
| 160 |
+
query_vec = model.encode([query], convert_to_tensor=False)
|
| 161 |
+
D, I = index.search(np.array(query_vec), k=1)
|
| 162 |
+
best_idx = I[0][0]
|
| 163 |
+
return {"title": titles[best_idx], "link": links[best_idx]}
|
| 164 |
+
|
| 165 |
+
|
| 166 |
def sample_chunks(chunks, n=3):
|
| 167 |
"""Pick N evenly spaced chunks across the whole video."""
|
| 168 |
|