newtechdevng commited on
Commit
e483b5a
Β·
verified Β·
1 Parent(s): c89f044

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +22 -0
  2. app (1).py +132 -0
  3. requirements (1).txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ cmake \
9
+ git \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Install Python dependencies
13
+ COPY requirements.txt .
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Copy app
17
+ COPY app.py .
18
+
19
+ # HF Spaces requires port 7860
20
+ EXPOSE 7860
21
+
22
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app (1).py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.responses import StreamingResponse
3
+ from pydantic import BaseModel
4
+ from typing import Optional
5
+ from llama_cpp import Llama
6
+ import os
7
+
8
+ # ── Model loading ──────────────────────────────────────────────────────────────
9
+ MODEL_REPO = "dipangshu22/Ambuj-Tripathi-Indian-Legal-Llama-GGUF" # ← change to your repo
10
+ MODEL_FILE = "llama-3.2-1b-instruct.Q4_K_M.gguf"
11
+ SYSTEM_PROMPT = (
12
+ "You are Ambuj, an expert AI assistant specialised in Indian law. "
13
+ "You provide accurate, well-structured legal information based on Indian statutes, "
14
+ "case law, and legal procedures. Always clarify that your responses are for "
15
+ "informational purposes only and not a substitute for professional legal advice."
16
+ )
17
+
18
+ print("Loading model …")
19
+ llm = Llama.from_pretrained(
20
+ repo_id=MODEL_REPO,
21
+ filename=MODEL_FILE,
22
+ n_ctx=4096,
23
+ n_threads=os.cpu_count() or 4,
24
+ verbose=False,
25
+ )
26
+ print("Model ready βœ“")
27
+
28
+ # ── FastAPI app ────────────────────────────────────────────────────────────────
29
+ app = FastAPI(
30
+ title="Indian Legal AI API",
31
+ description="API for the Ambuj-Tripathi Indian Legal Llama model",
32
+ version="1.0.0",
33
+ )
34
+
35
+
36
+ # ── Request / Response schemas ─────────────────────────────────────────────────
37
+ class Message(BaseModel):
38
+ role: str # "user" | "assistant" | "system"
39
+ content: str
40
+
41
+
42
+ class ChatRequest(BaseModel):
43
+ messages: list[Message]
44
+ max_tokens: Optional[int] = 512
45
+ temperature: Optional[float] = 0.7
46
+ stream: Optional[bool] = False
47
+
48
+
49
+ class ChatResponse(BaseModel):
50
+ role: str = "assistant"
51
+ content: str
52
+
53
+
54
+ # ── Routes ─────────────────────────────────────────────────────────────────────
55
+ @app.get("/")
56
+ def root():
57
+ return {
58
+ "name": "Indian Legal AI API",
59
+ "model": MODEL_FILE,
60
+ "endpoints": {
61
+ "POST /chat": "Send messages, get a response",
62
+ "POST /ask": "Simple single-question shortcut",
63
+ "GET /health": "Health check",
64
+ "GET /docs": "Swagger UI",
65
+ }
66
+ }
67
+
68
+
69
+ @app.get("/health")
70
+ def health():
71
+ return {"status": "ok", "model_loaded": llm is not None}
72
+
73
+
74
+ @app.post("/chat")
75
+ def chat(request: ChatRequest):
76
+ """
77
+ Full chat endpoint β€” pass a list of messages with roles.
78
+ Optionally stream the response.
79
+ """
80
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
81
+ for m in request.messages:
82
+ if m.role not in ("user", "assistant", "system"):
83
+ raise HTTPException(status_code=400, detail=f"Invalid role: {m.role}")
84
+ messages.append({"role": m.role, "content": m.content})
85
+
86
+ if request.stream:
87
+ def generate():
88
+ for chunk in llm.create_chat_completion(
89
+ messages=messages,
90
+ max_tokens=request.max_tokens,
91
+ temperature=request.temperature,
92
+ stream=True,
93
+ ):
94
+ delta = chunk["choices"][0]["delta"].get("content", "")
95
+ if delta:
96
+ yield delta
97
+
98
+ return StreamingResponse(generate(), media_type="text/plain")
99
+
100
+ response = llm.create_chat_completion(
101
+ messages=messages,
102
+ max_tokens=request.max_tokens,
103
+ temperature=request.temperature,
104
+ stream=False,
105
+ )
106
+ content = response["choices"][0]["message"]["content"]
107
+ return ChatResponse(content=content)
108
+
109
+
110
+ class AskRequest(BaseModel):
111
+ question: str
112
+ max_tokens: Optional[int] = 512
113
+ temperature: Optional[float] = 0.7
114
+
115
+
116
+ @app.post("/ask")
117
+ def ask(request: AskRequest):
118
+ """
119
+ Simple single-question shortcut β€” no need to format messages manually.
120
+ """
121
+ messages = [
122
+ {"role": "system", "content": SYSTEM_PROMPT},
123
+ {"role": "user", "content": request.question},
124
+ ]
125
+ response = llm.create_chat_completion(
126
+ messages=messages,
127
+ max_tokens=request.max_tokens,
128
+ temperature=request.temperature,
129
+ stream=False,
130
+ )
131
+ content = response["choices"][0]["message"]["content"]
132
+ return {"question": request.question, "answer": content}
requirements (1).txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ llama-cpp-python==0.3.4
2
+ fastapi>=0.115.0
3
+ uvicorn>=0.30.0
4
+ pydantic>=2.0.0
5
+ PyMuPDF>=1.24.0