Spaces:

eaedk
/

RAG-FastAPI

Sleeping

App Files Files Community

eaedk commited on Aug 21, 2025

Commit

fab7c40

1 Parent(s): 5f790a2

app

Browse files

Files changed (1) hide show

_app.py +206 -0

_app.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import os
+import chromadb
+from dotenv import load_dotenv
+from uuid import uuid4
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.responses import JSONResponse, StreamingResponse
+from fastapi.middleware.cors import CORSMiddleware
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_openai import OpenAIEmbeddings
+from langchain.chat_models import init_chat_model
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_core.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain_chroma import Chroma
+import uvicorn
+# ----------------------
+# Configuration and Setup
+# ----------------------
+# Load environment variables from .env file
+load_dotenv()
+# Directories for file upload and persistent storage of Chroma vector database
+UPLOAD_DIR = "uploads"
+CHROMA_DIR = "chroma_db"
+# Set model versions for LLM and embeddings
+LLM = "gpt-4o-mini-2024-07-18"
+EMBEDDING_MODEL = "text-embedding-3-small"
+# Ensure necessary directories exist
+os.makedirs(UPLOAD_DIR, exist_ok=True)
+os.makedirs(CHROMA_DIR, exist_ok=True)
+# Set OpenAI API key from environment variables
+os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
+# Initialize a persistent client for Chroma, specifying where the data is stored
+client = chromadb.PersistentClient(path=CHROMA_DIR)
+# FastAPI application setup
+app = FastAPI()
+# Enable CORS (Cross-Origin Resource Sharing) for all origins, methods, and headers
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ----------------------
+# LangChain Initialization
+# ----------------------
+# Initialize the embedding model using OpenAI's API
+embedding = OpenAIEmbeddings(model=EMBEDDING_MODEL)
+# Initialize the language model (LLM) using OpenAI's API (with temperature for creativity)
+llm = init_chat_model(model=LLM, model_provider="openai", temperature=0)
+# Text splitter to split documents into manageable chunks (for efficient processing)
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=1200,
+    chunk_overlap=50,
+    separators=["\n\n", "\n", ".", " ", ""]
+)
+# Set up Chroma vector store to store document embeddings and their metadata
+vectorstore = Chroma(
+    client=client,
+    persist_directory=CHROMA_DIR,
+    embedding_function=embedding,
+    collection_name="legal_docs"
+)
+# Define the prompt template that will be used in the LLM for querying with context
+prompt_template = """
+Tu es un assistant utile qui réponds en français de manière claire et concise.
+Réponds uniquement en utilisant le contexte fourni.
+Si tu ne sais pas, dis "Je ne sais pas".
+contexte : {context}
+question : {question}
+answer :
+"""
+# Initialize the prompt template with variables
+prompt = PromptTemplate(
+    input_variables=["question", "context"],
+    template=prompt_template,
+)
+# Function to format documents for easier reading (used for retriever output)
+def format_docs(docs):
+    return "\n\n".join([f"(Page {d.metadata.get('page','?')}) {d.page_content}" for d in docs])
+# Set up the retriever to pull relevant documents from the vector store based on a query
+retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
+# Define the QA chain that links together the retriever, document formatting, and LLM for querying
+qa_chain = (
+    {
+        "context": retriever | format_docs,
+        "question": RunnablePassthrough(),
+    }
+    | prompt
+    | llm
+    | StrOutputParser()
+)
+# ----------------------
+# Document Management Functions
+# ----------------------
+# Function to add a PDF document to the vector store (embedding and splitting into chunks)
+def add_pdf_to_vectorstore(file_path):
+    # Load the PDF file
+    loader = PyPDFLoader(file_path)
+    documents = loader.load()
+    # Split the document into smaller chunks
+    docs = text_splitter.split_documents(documents)
+    # Generate a unique ID for each chunk
+    uuids = [str(uuid4()) for _ in range(len(docs))]
+    print(f"Number of documents split: {len(docs)}")
+    # Add documents to the vector store (Chroma)
+    vectorstore.add_documents(documents=docs, ids=uuids)
+# ----------------------
+# FastAPI Routes
+# ----------------------
+# Route to upload a PDF file and add its content to the vector store
+@app.post("/upload/")
+async def upload_pdf(file: UploadFile = File(...)):
+    # Check if the uploaded file is a PDF
+    if not file.filename.endswith(".pdf"):
+        raise HTTPException(status_code=400, detail="Seuls les fichiers PDF sont acceptés.")
+    # Save the uploaded file to disk
+    file_path = os.path.join(UPLOAD_DIR, file.filename)
+    with open(file_path, "wb") as buffer:
+       buffer.write(await file.read())
+    # Add the PDF document to the vector store
+    add_pdf_to_vectorstore(file_path)
+    # Return a success message
+    content = {"message": f"Fichier {file.filename} ajouté à la base de connaissances."}
+    print(f"{content=}")
+    return JSONResponse(content=content)
+# Route to interact with the assistant via a chat-like interface
+@app.get("/chat/")
+async def chat(message: str):
+    # Use the QA chain to get a response from the assistant
+    response = qa_chain.invoke(message)
+    # Return the response from the assistant
+    print(f"{response=}")
+    return {"answer": response}
+# ----------------------
+# Streaming Response for Chat
+# ----------------------
+# This function will simulate the streaming of the response.
+async def stream_chat_response(message: str):
+    # Initialize the chat model (this could be done outside the function if it's expensive)
+    # response_parts = []
+    # print("Streaming API response:\n")
+    async for part in qa_chain.astream(message):
+        # response_parts.append(part)  # Collect all parts of the response
+        # Yield each part as a chunk for streaming to the client
+        print(part, end="", flush=True)
+        yield part
+    # # Final join to return the complete response after streaming
+    # full_response = "".join(response_parts)
+    # yield full_response
+# FastAPI endpoint for streaming chat responses
+@app.get("/chat_stream/")
+async def chat_stream(message: str):
+    """
+    Endpoint to stream chat responses progressively.
+    """
+    # Return a StreamingResponse that will stream the response from the generator
+    return StreamingResponse(stream_chat_response(message), media_type="text/plain")
+# ----------------------
+# Start the FastAPI app using Uvicorn
+# ----------------------
+if __name__ == "__main__":
+    # Run the FastAPI application with auto-reloading enabled
+    uvicorn.run(app, host="0.0.0.0", port=8000)