Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from langchain_qdrant import QdrantVectorStore | |
| from qdrant_client import QdrantClient | |
| from qdrant_client.http.models import Distance, VectorParams | |
| from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode | |
| from qdrant_client import QdrantClient, models | |
| from qdrant_client.http.models import Distance, SparseVectorParams, VectorParams | |
| from uuid import uuid4 | |
| from langchain_core.documents import Document | |
| document_1 = Document( | |
| page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.", | |
| metadata={"source": "tweet"}, | |
| ) | |
| document_2 = Document( | |
| page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit.", | |
| metadata={"source": "news"}, | |
| ) | |
| document_3 = Document( | |
| page_content="Building an exciting new project with LangChain - come check it out!", | |
| metadata={"source": "tweet"}, | |
| ) | |
| document_4 = Document( | |
| page_content="Robbers broke into the city bank and stole $1 million in cash.", | |
| metadata={"source": "news"}, | |
| ) | |
| document_5 = Document( | |
| page_content="Wow! That was an amazing movie. I can't wait to see it again.", | |
| metadata={"source": "tweet"}, | |
| ) | |
| document_6 = Document( | |
| page_content="Is the new iPhone worth the price? Read this review to find out.", | |
| metadata={"source": "website"}, | |
| ) | |
| document_7 = Document( | |
| page_content="The top 10 soccer players in the world right now.", | |
| metadata={"source": "website"}, | |
| ) | |
| document_8 = Document( | |
| page_content="LangGraph is the best framework for building stateful, agentic applications!", | |
| metadata={"source": "tweet"}, | |
| ) | |
| document_9 = Document( | |
| page_content="The stock market is down 500 points today due to fears of a recession.", | |
| metadata={"source": "news"}, | |
| ) | |
| document_10 = Document( | |
| page_content="I have a bad feeling I am going to get deleted :(", | |
| metadata={"source": "tweet"}, | |
| ) | |
| documents = [ | |
| document_1, | |
| document_2, | |
| document_3, | |
| document_4, | |
| document_5, | |
| document_6, | |
| document_7, | |
| document_8, | |
| document_9, | |
| document_10, | |
| ] | |
| uuids = [str(uuid4()) for _ in range(len(documents))] | |
| docs = documents | |
| sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25") | |
| client = QdrantClient(path="tmp/langchain_qdrant") | |
| # Create a collection with sparse vectors | |
| client.create_collection( | |
| collection_name="my_documents", | |
| vectors_config={"dense": VectorParams(size=3072, distance=Distance.COSINE)}, | |
| sparse_vectors_config={ | |
| "sparse": SparseVectorParams(index=models.SparseIndexParams(on_disk=False)) | |
| }, | |
| ) | |
| qdrant = QdrantVectorStore( | |
| client=client, | |
| collection_name="my_documents", | |
| sparse_embedding=sparse_embeddings, | |
| retrieval_mode=RetrievalMode.SPARSE, | |
| sparse_vector_name="sparse", | |
| ) | |
| qdrant.add_documents(documents=documents, ids=uuids) | |
| app = FastAPI() | |
| def get_data(query: str): | |
| # query = "How much money did the robbers steal?" | |
| found_docs = [x.model_dump() for x in qdrant.similarity_search(query)] | |
| for doc in found_docs: | |
| doc.pop("id", None) | |
| # key = | |
| for k in list(doc["metadata"].keys()): | |
| if k[0] == "_": | |
| doc["metadata"].pop(k) | |
| return { | |
| "data": found_docs | |
| } | |
| def greet_json(): | |
| return {"Hello": "World!"} | |