| from typing import Annotated |
|
|
| from fastapi import APIRouter, UploadFile, File, Body |
| from fastapi.responses import JSONResponse |
| import openai |
| import io |
| import os |
| from pypdf import PdfReader |
| from langchain.schema import Document |
| from langchain.chains.question_answering import load_qa_chain |
| from langchain.llms import OpenAI |
| from langchain.text_splitter import SentenceTransformersTokenTextSplitter |
| from db.vector_store import Store |
|
|
| router = APIRouter() |
| _chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff", verbose=True) |
|
|
|
|
| @router.get("/v1/datasets/{name}/answer") |
| async def answer(name: str, query: str): |
| """ Answer a question from the doc |
| Parameters: |
| - `name` of the doc. |
| - `query` to be answered. |
| Return: |
| a string answer to the query |
| """ |
| _db = Store.get_instance().get_collection(name) |
| print(query) |
| docs = _db.similarity_search_with_score(query=query) |
| print(docs) |
| answer = _chain.run(input_documents=[tup[0] for tup in docs], question=query) |
| return JSONResponse(status_code=200, content={"answer": answer, "file_score": [[f"{d[0].metadata['file']} : {d[0].metadata['page']}", d[1]] for d in docs]}) |
|
|
|
|
| @router.get("/v1/datasets") |
| async def list() -> list[dict]: |
| """ List all the datasets avaialble to query. |
| :return: |
| list of datasets |
| """ |
| |
| return Store.get_instance().list_collections() |