Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -356,7 +356,6 @@ if __name__ == "__main__":
|
|
| 356 |
|
| 357 |
'''
|
| 358 |
|
| 359 |
-
|
| 360 |
import os
|
| 361 |
import gradio as gr
|
| 362 |
import requests
|
|
@@ -503,6 +502,21 @@ def get_retrieved_context(query: str, top_k=3):
|
|
| 503 |
return "
|
| 504 |
".join(context_parts)
|
| 505 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 506 |
# ----------- 5. LLM Chain Setup (Lightning AI generator) -----------
|
| 507 |
model = LitServeLLM(endpoint_url=LITSERVE_ENDPOINT)
|
| 508 |
|
|
@@ -764,4 +778,3 @@ if __name__ == "__main__":
|
|
| 764 |
})
|
| 765 |
|
| 766 |
demo.launch()
|
| 767 |
-
|
|
|
|
| 356 |
|
| 357 |
'''
|
| 358 |
|
|
|
|
| 359 |
import os
|
| 360 |
import gradio as gr
|
| 361 |
import requests
|
|
|
|
| 502 |
return "
|
| 503 |
".join(context_parts)
|
| 504 |
|
| 505 |
+
with mlflow.start_span("pinecone_query"):
|
| 506 |
+
start_time = time.time()
|
| 507 |
+
results = index.query(
|
| 508 |
+
namespace="rag-ns",
|
| 509 |
+
vector=query_embedding,
|
| 510 |
+
top_k=top_k,
|
| 511 |
+
include_metadata=True
|
| 512 |
+
)
|
| 513 |
+
mlflow.log_metric("pinecone_latency", time.time() - start_time)
|
| 514 |
+
mlflow.log_metric("retrieved_chunks", len(results['matches']))
|
| 515 |
+
|
| 516 |
+
context_parts = [match['metadata']['text'] for match in results['matches']]
|
| 517 |
+
return "
|
| 518 |
+
".join(context_parts)
|
| 519 |
+
|
| 520 |
# ----------- 5. LLM Chain Setup (Lightning AI generator) -----------
|
| 521 |
model = LitServeLLM(endpoint_url=LITSERVE_ENDPOINT)
|
| 522 |
|
|
|
|
| 778 |
})
|
| 779 |
|
| 780 |
demo.launch()
|
|
|