PhysicsUncomplicated commited on
Commit
56ba893
Β·
verified Β·
1 Parent(s): ef79da6

Upload 12 files

Browse files
Files changed (4) hide show
  1. .huggingface.yml +3 -0
  2. app.py +92 -133
  3. launch.sh +4 -0
  4. requirements.txt +1 -1
.huggingface.yml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ sdk: docker
2
+ app_file: app.py
3
+ entrypoint: ./launch.sh
app.py CHANGED
@@ -1,133 +1,92 @@
1
- """
2
- app.py β€” Bodha Vector API (Stable Hugging Face Version)
3
- -------------------------------------------------------
4
- Provides:
5
- β€’ /vector-api/search β†’ REST API with JSON output
6
- β€’ / β†’ Gradio UI for manual testing
7
- """
8
-
9
- import os
10
- import json
11
- import chromadb
12
- from fastapi import FastAPI, Request, Depends, HTTPException, status
13
- from fastapi.responses import JSONResponse
14
- from fastapi.middleware.cors import CORSMiddleware
15
- from fastapi.security import HTTPBasic, HTTPBasicCredentials
16
- import gradio as gr
17
- from typing import List
18
- from contextlib import asynccontextmanager
19
-
20
- # ========== CONFIG ==========
21
- os.environ["CHROMA_TELEMETRY"] = "FALSE" # disable telemetry warnings
22
-
23
- CHROMA_PATH = "/app/textbook_db" # use absolute path inside Hugging Face
24
- COLLECTION_NAME = "textbooks"
25
- USERNAME = "bodha"
26
- PASSWORD = "securepass123"
27
- TOP_K = 5
28
- # =============================
29
-
30
- # ---------- Initialize Chroma ----------
31
- try:
32
- chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
33
- collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
34
- except Exception as e:
35
- raise RuntimeError(f"❌ Failed to initialize Chroma: {e}")
36
-
37
- security = HTTPBasic()
38
-
39
- # ---------- Lifespan Startup ----------
40
- @asynccontextmanager
41
- async def lifespan(app: FastAPI):
42
- print("βœ… Bodha Vector API is starting up...")
43
- try:
44
- print(f"πŸ“‚ Loading collection: {COLLECTION_NAME}")
45
- print(f"πŸ“„ Total docs: {collection.count()}")
46
- except Exception as e:
47
- print(f"⚠️ Warning: Could not access collection: {e}")
48
- yield
49
- print("πŸ›‘ Shutting down Bodha Vector API...")
50
-
51
- app = FastAPI(lifespan=lifespan, title="Bodha Vector API")
52
-
53
- # ---------- Allow CORS for WordPress / JS ----------
54
- app.add_middleware(
55
- CORSMiddleware,
56
- allow_origins=["*"],
57
- allow_methods=["*"],
58
- allow_headers=["*"],
59
- )
60
-
61
- # ---------- Basic Auth ----------
62
- def verify_credentials(credentials: HTTPBasicCredentials = Depends(security)):
63
- if not (
64
- credentials.username == USERNAME and
65
- credentials.password == PASSWORD
66
- ):
67
- raise HTTPException(
68
- status_code=status.HTTP_401_UNAUTHORIZED,
69
- detail="Invalid credentials",
70
- headers={"WWW-Authenticate": "Basic"},
71
- )
72
- return credentials.username
73
-
74
- # ---------- API Endpoint ----------
75
- @app.post("/vector-api/search")
76
- async def search_vector(request: Request, user: str = Depends(verify_credentials)):
77
- try:
78
- data = await request.json()
79
- query = data.get("query", "").strip()
80
- if not query:
81
- return JSONResponse({"error": "No query provided."}, status_code=400)
82
-
83
- results = collection.query(query_texts=[query], n_results=TOP_K)
84
- docs = results.get("documents", [[]])[0]
85
- metas = results.get("metadatas", [[]])[0]
86
- dists = results.get("distances", [[]])[0]
87
-
88
- response_data = []
89
- for i, doc in enumerate(docs):
90
- meta = metas[i]
91
- score = 1 - dists[i] if dists else None # similarity score
92
- response_data.append({
93
- "rank": i + 1,
94
- "score": round(score, 4) if score else None,
95
- "class": meta.get("class", ""),
96
- "chapter": meta.get("chapter", ""),
97
- "pages": meta.get("pages", ""),
98
- "source": meta.get("source_pdf", ""),
99
- "text": doc
100
- })
101
- return JSONResponse(response_data)
102
-
103
- except Exception as e:
104
- return JSONResponse({"error": str(e)}, status_code=500)
105
-
106
- # ---------- Optional Gradio Interface ----------
107
- def gradio_query(query):
108
- if not query.strip():
109
- return "⚠️ Please enter a query."
110
- results = collection.query(query_texts=[query], n_results=TOP_K)
111
- docs = results.get("documents", [[]])[0]
112
- metas = results.get("metadatas", [[]])[0]
113
- dists = results.get("distances", [[]])[0]
114
- output = ""
115
- for i, doc in enumerate(docs):
116
- meta = metas[i]
117
- score = 1 - dists[i] if dists else None
118
- output += f"### Result {i+1} (score: {round(score,3) if score else 'N/A'})\n"
119
- output += f"**Class:** {meta.get('class','')} \n"
120
- output += f"**Source:** {meta.get('source_pdf','')} \n"
121
- output += f"**Pages:** {meta.get('pages','')} \n\n"
122
- output += f"{doc[:700]}...\n\n---\n\n"
123
- return output or "No results found."
124
-
125
- demo = gr.Interface(
126
- fn=gradio_query,
127
- inputs=gr.Textbox(label="Ask your Physics/Science question"),
128
- outputs="markdown",
129
- title="πŸ” Bodha Textbook Search",
130
- description="Search across NCERT (Class 6–12) & H.C. Verma textbooks"
131
- )
132
-
133
- app = gr.mount_gradio_app(app, demo, path="/")
 
1
+ """
2
+ app.py β€” Bodha Vector API for Hugging Face
3
+ Provides:
4
+ β€’ /vector-api/search β†’ JSON API (with Basic Auth)
5
+ β€’ / β†’ Gradio interface
6
+ """
7
+
8
+ import os, chromadb, gradio as gr
9
+ from fastapi import FastAPI, Request, Depends, HTTPException, status
10
+ from fastapi.responses import JSONResponse
11
+ from fastapi.middleware.cors import CORSMiddleware
12
+ from fastapi.security import HTTPBasic, HTTPBasicCredentials
13
+ from contextlib import asynccontextmanager
14
+
15
+ # ---------- CONFIG ----------
16
+ os.environ["CHROMA_TELEMETRY"] = "FALSE"
17
+ CHROMA_PATH = "/app/textbook_db"
18
+ COLLECTION_NAME = "textbooks"
19
+ USERNAME, PASSWORD = "bodha", "securepass123"
20
+ TOP_K = 5
21
+ # ----------------------------
22
+
23
+ # ---------- INIT ----------
24
+ client = chromadb.PersistentClient(path=CHROMA_PATH)
25
+ collection = client.get_or_create_collection(COLLECTION_NAME)
26
+ security = HTTPBasic()
27
+
28
+ @asynccontextmanager
29
+ async def lifespan(app: FastAPI):
30
+ print("βœ… Starting Bodha Vector API...")
31
+ print(f"πŸ“š Collection: {COLLECTION_NAME} | Docs: {collection.count()}")
32
+ yield
33
+ print("πŸ›‘ Stopping Bodha API...")
34
+
35
+ app = FastAPI(lifespan=lifespan)
36
+ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
37
+
38
+ # ---------- AUTH ----------
39
+ def verify(credentials: HTTPBasicCredentials = Depends(security)):
40
+ if credentials.username != USERNAME or credentials.password != PASSWORD:
41
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED,
42
+ detail="Invalid credentials",
43
+ headers={"WWW-Authenticate": "Basic"})
44
+ return credentials.username
45
+
46
+ # ---------- API ----------
47
+ @app.post("/vector-api/search")
48
+ async def search_vector(request: Request, user: str = Depends(verify)):
49
+ body = await request.json()
50
+ query = body.get("query", "").strip()
51
+ if not query:
52
+ return JSONResponse({"error": "Missing query"}, status_code=400)
53
+
54
+ res = collection.query(query_texts=[query], n_results=TOP_K)
55
+ docs, metas, dists = res["documents"][0], res["metadatas"][0], res["distances"][0]
56
+ out = []
57
+ for i, doc in enumerate(docs):
58
+ meta = metas[i]
59
+ score = 1 - dists[i] if dists else None
60
+ out.append({
61
+ "rank": i + 1,
62
+ "score": round(score, 4) if score else None,
63
+ "class": meta.get("class", ""),
64
+ "chapter": meta.get("chapter", ""),
65
+ "pages": meta.get("pages", ""),
66
+ "source": meta.get("source_pdf", ""),
67
+ "text": doc
68
+ })
69
+ return JSONResponse(out)
70
+
71
+ # ---------- GRADIO ----------
72
+ def gradio_query(q):
73
+ if not q.strip():
74
+ return "⚠️ Enter a query."
75
+ res = collection.query(query_texts=[q], n_results=TOP_K)
76
+ docs, metas, dists = res["documents"][0], res["metadatas"][0], res["distances"][0]
77
+ out = ""
78
+ for i, doc in enumerate(docs):
79
+ meta = metas[i]
80
+ score = 1 - dists[i] if dists else None
81
+ out += f"### {i+1}. {meta.get('class','')} ({meta.get('source_pdf','')}) β€” score {round(score,3)}\n{doc[:700]}...\n\n---\n"
82
+ return out or "No results."
83
+
84
+ demo = gr.Interface(
85
+ fn=gradio_query,
86
+ inputs=gr.Textbox(label="Ask a Physics/Science question"),
87
+ outputs="markdown",
88
+ title="πŸ” Bodha Textbook Search",
89
+ description="Search across NCERT (Class 6–12) + H.C.Verma"
90
+ )
91
+
92
+ app = gr.mount_gradio_app(app, demo, path="/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
launch.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Hugging Face starts this script; it launches FastAPI via uvicorn.
3
+ echo "πŸš€ Launching Bodha Vector API on port 7860..."
4
+ exec uvicorn app:app --host 0.0.0.0 --port 7860
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  chromadb==0.5.3
2
  gradio==4.29.0
3
  fastapi==0.111.0
4
- uvicorn==0.30.0
 
1
  chromadb==0.5.3
2
  gradio==4.29.0
3
  fastapi==0.111.0
4
+ uvicorn==0.30.0