Spaces:

karani10
/

Smart_NoteBook

Sleeping

App Files Files Community

karani10 commited on 7 days ago

Commit

7d25988

1 Parent(s): 86067d0

Add application file

Browse files

Files changed (3) hide show

Dockerfile +16 -0
app.py +4 -187
requirements.txt +2 -26

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -1,190 +1,7 @@
-import os
-import shutil
-import tempfile
-from contextlib import asynccontextmanager
-from typing import Annotated
-from dotenv import dotenv_values
-from fastapi import FastAPI, File, Form, HTTPException, UploadFile
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-from helpers import (
-    generate_embedding_doc,
-    get_text_from_pdf,
-    run_rag_pipeline,
-    split_doc_chunks,
-)
-# --------------------------------------------------
-# CONFIG
-# --------------------------------------------------
-config = dotenv_values(".env")
-GROQ_API_KEY = config.get(
-    "GROQ_API_KEY",
-    os.getenv("GROQ_API_KEY", "")
-)
-if not GROQ_API_KEY:
-    raise RuntimeError("Missing GROQ_API_KEY")
-# --------------------------------------------------
-# SIMPLE LIST STORAGE: I Don't Use Chroma DB --> Deployment Causes
-# --------------------------------------------------
-chunked_documents = []
-# --------------------------------------------------
-# FASTAPI
-# --------------------------------------------------
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    print("API Started")
-    yield
-    print("API Stopped")
-app = FastAPI(
-    title="Simple RAG API",
-    lifespan=lifespan,
-)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# --------------------------------------------------
-# SCHEMAS
-# --------------------------------------------------
-class QueryRequest(BaseModel):
-    question: str
-    top_k: int = 5
-    rerank_top_k: int = 3
-# --------------------------------------------------
-# ROUTES
-# --------------------------------------------------
 @app.get("/")
-def home():
-    return {
-        "message": "RAG API Running"
-    }
-# --------------------------------------------------
-# UPLOAD PDF
-# --------------------------------------------------
-@app.post("/upload-pdf")
-async def upload_pdf(
-    file: Annotated[
-        UploadFile,
-        File(description="PDF file")
-    ],
-):
-    print("FILE SEND: ", file)
-    global chunked_documents
-    # -------------------------------
-    # CHECK PDF
-    # -------------------------------
-    if not file.filename.endswith(".pdf"):
-        raise HTTPException(
-            status_code=400,
-            detail="Only PDF allowed"
-        )
-    # -------------------------------
-    # SAVE TEMP PDF
-    # -------------------------------
-    with tempfile.NamedTemporaryFile(
-        delete=False,
-        suffix=".pdf"
-    ) as tmp:
-        shutil.copyfileobj(file.file, tmp)
-        tmp_path = tmp.name
-    try:
-        # -------------------------------
-        # EXTRACT TEXT
-        # -------------------------------
-        documents = get_text_from_pdf(tmp_path)
-        if not documents:
-            raise HTTPException(
-                status_code=400,
-                detail="No text found"
-            )
-        # -------------------------------
-        # CHUNKING
-        # -------------------------------
-        chunked_documents = split_doc_chunks(
-            documents
-        )
-        # -------------------------------
-        # GENERATE EMBEDDINGS
-        # -------------------------------
-        chunked_documents = generate_embedding_doc(
-            chunked_documents
-        )
-        return {
-            "message": "PDF indexed successfully",
-            "chunks": len(chunked_documents)
-        }
-    finally:
-        os.unlink(tmp_path)
-# --------------------------------------------------
-# QUERY
-# --------------------------------------------------
-@app.post("/query")
-def query(req: QueryRequest):
-    global chunked_documents
-    print("Question", req)
-    if not chunked_documents:
-        raise HTTPException(
-            status_code=400,
-            detail="Upload PDF first"
-        )
-    answer = run_rag_pipeline(
-        question=req.question,
-        chunked_documents=chunked_documents,
-        groq_api_key=GROQ_API_KEY,
-        top_k=req.top_k,
-        rerank_top_k=req.rerank_top_k,
-    )
-    return {
-        "question": req.question,
-        "answer": answer,
-    }

+from fastapi import FastAPI
+app = FastAPI()
 @app.get("/")
+def greet_json():
+    return {"Hello": "World!"}

requirements.txt CHANGED Viewed

@@ -1,26 +1,2 @@
-# RAG API — Python dependencies
-# Web framework
-fastapi>=0.111.0
-uvicorn[standard]>=0.29.0
-python-multipart>=0.0.9
-# Environment
-python-dotenv>=1.0.0
-# LLM
-groq>=0.9.0
-# Embeddings
-sentence-transformers>=3.0.0
-# PDF extraction
-pdfplumber>=0.11.0
-# Retrieval / ranking
-rank-bm25>=0.2.2
-scikit-learn>=1.4.0
-numpy>=1.26.0
-# Validation
-pydantic>=2.7.0


1	+ fastapi
2	+ uvicorn[standard]