junaid17 commited on
Commit
db1e5a4
·
verified ·
1 Parent(s): d22fd1b

Upload 6 files

Browse files
Files changed (6) hide show
  1. Dockerfile +27 -0
  2. RAG.py +310 -0
  3. app.py +119 -0
  4. data_ingestion.py +67 -0
  5. requirements.txt +12 -0
  6. utils.py +55 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Prevent Python from writing pyc files
4
+ ENV PYTHONDONTWRITEBYTECODE=1
5
+ ENV PYTHONUNBUFFERED=1
6
+
7
+ WORKDIR /app
8
+
9
+ # Install system dependencies (required for sklearn / xgboost)
10
+ RUN apt-get update && apt-get install -y \
11
+ build-essential \
12
+ gcc \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Copy and install dependencies first (better caching)
16
+ COPY requirements.txt .
17
+ RUN pip install --no-cache-dir --upgrade pip \
18
+ && pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Copy application code
21
+ COPY . .
22
+
23
+ # Hugging Face expects port 7860
24
+ EXPOSE 7860
25
+
26
+ # Start FastAPI
27
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
RAG.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import StateGraph, START, END
2
+ from typing import TypedDict, Annotated
3
+ from langchain_groq import ChatGroq
4
+ from langchain_openai import OpenAIEmbeddings, ChatOpenAI
5
+ from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
6
+ from langgraph.graph.message import add_messages
7
+ from langchain_core.tools import tool
8
+ from dotenv import load_dotenv
9
+ from langgraph.checkpoint.memory import MemorySaver
10
+ import os
11
+ from langchain_community.vectorstores import FAISS
12
+ from langchain_community.tools.tavily_search import TavilySearchResults
13
+
14
+ load_dotenv()
15
+
16
+
17
+ #===========================================
18
+ # Load FAISS DB & Reload Logic [FEATURE ADDED]
19
+ #===========================================
20
+
21
+ FAISS_DB_PATH = "vectorstore/db_faiss"
22
+ embeddings = OpenAIEmbeddings(model='text-embedding-3-small')
23
+
24
+ # Global variable for the database
25
+ db = None
26
+
27
+ def reload_vector_store():
28
+ """
29
+ Reloads the FAISS index from disk.
30
+ Call this function after a new file is ingested.
31
+ """
32
+ global db
33
+ if os.path.exists(FAISS_DB_PATH):
34
+ print(f"Loading FAISS from {FAISS_DB_PATH}...")
35
+ try:
36
+ db = FAISS.load_local(
37
+ FAISS_DB_PATH,
38
+ embeddings,
39
+ allow_dangerous_deserialization=True
40
+ )
41
+ print("Vector store loaded successfully.")
42
+ except Exception as e:
43
+ print(f"Error loading vector store: {e}")
44
+ db = None
45
+ else:
46
+ print("Warning: No Vector DB found. Please run ingestion first.")
47
+ db = None
48
+
49
+ # Initial Load
50
+ reload_vector_store()
51
+
52
+
53
+ #===========================================
54
+ # Class Schema
55
+ #===========================================
56
+
57
+ class Ragbot_State(TypedDict):
58
+ query : str
59
+ context : list[str]
60
+ metadata : list[dict]
61
+ RAG : bool
62
+ web_search : bool
63
+ model_name : str
64
+ web_context : str
65
+ response : Annotated[list[BaseMessage], add_messages]
66
+
67
+ #===========================================
68
+ # LLM'S
69
+ #===========================================
70
+
71
+
72
+ llm_kimi2 = ChatGroq(model='moonshotai/kimi-k2-instruct-0905', streaming=True, temperature=0.4)
73
+ llm_gpt = ChatOpenAI(model='gpt-4.1-nano', streaming=True, temperature=0.2)
74
+ llm_gpt_oss = ChatGroq(model='openai/gpt-oss-120b', streaming=True, temperature=0.3)
75
+ llm_lamma4 = ChatGroq(model='meta-llama/llama-4-scout-17b-16e-instruct', streaming=True, temperature=0.5)
76
+ llm_qwen3 = ChatGroq(model='qwen/qwen3-32b', streaming=True, temperature=0.5)
77
+
78
+ def get_llm(model_name: str):
79
+ if model_name == "kimi2":
80
+ return llm_kimi2
81
+ elif model_name == "gpt":
82
+ return llm_gpt
83
+ elif model_name == "gpt_oss":
84
+ return llm_gpt_oss
85
+ elif model_name == "lamma4":
86
+ return llm_lamma4
87
+ elif model_name == "qwen3":
88
+ return llm_qwen3
89
+ else:
90
+ return llm_gpt # fallback if no match
91
+
92
+ #===========================================
93
+ # Search tool
94
+ #===========================================
95
+
96
+ @tool
97
+ def tavily_search(query: str) -> dict:
98
+ """
99
+ Perform a real-time web search using Tavily.
100
+ """
101
+ try:
102
+ search = TavilySearchResults(max_results=2)
103
+ results = search.run(query)
104
+ return {"query": query, "results": results}
105
+ except Exception as e:
106
+ return {"error": str(e)}
107
+
108
+ #===========================================
109
+ # fetching web context
110
+ #===========================================
111
+
112
+ def fetch_web_context(state: Ragbot_State):
113
+ user_query = state["query"]
114
+
115
+ enriched_query = f"""
116
+ Fetch the latest, accurate, and up-to-date information about:
117
+ {user_query}
118
+
119
+ Focus on:
120
+ - recent news
121
+ - official announcements
122
+ - verified sources
123
+ - factual data
124
+ """
125
+
126
+ web_result = tavily_search.run(enriched_query)
127
+
128
+ return {
129
+ "web_context": str(web_result)
130
+ }
131
+
132
+ #===========================================
133
+ # db search
134
+ #===========================================
135
+
136
+ @tool
137
+ def faiss_search(query: str) -> str:
138
+ """Search the FAISS vectorstore and return relevant documents."""
139
+ # Check global db variable
140
+ if db is None:
141
+ return "No documents have been uploaded yet.", []
142
+
143
+ try:
144
+ results = db.similarity_search(query, k=3)
145
+ context = "\n\n".join([doc.page_content for doc in results])
146
+ metadata = [doc.metadata for doc in results]
147
+ return context, metadata
148
+ except Exception as e:
149
+ return f"Error searching vector store: {str(e)}", []
150
+
151
+ #===========================================
152
+ # router
153
+ #===========================================
154
+
155
+
156
+ def router(state: Ragbot_State):
157
+ if state["RAG"]:
158
+ return "fetch_context"
159
+
160
+ if state["web_search"]:
161
+ return "fetch_web_context"
162
+
163
+ return "chat"
164
+
165
+ #===========================================
166
+ # fetching context
167
+ #===========================================
168
+
169
+ def fetch_context(state: Ragbot_State):
170
+ query = state["query"]
171
+ context, metadata = faiss_search.invoke({"query": query})
172
+ return {"context": [context], "metadata": [metadata]}
173
+
174
+
175
+ #===========================================
176
+ # system prompt
177
+ #===========================================
178
+
179
+
180
+ SYSTEM_PROMPT = SystemMessage(
181
+ content="""
182
+ You are an intelligent conversational assistant and retrieval-augmented AI system built by Junaid.
183
+
184
+ Your role is to:
185
+ - Engage naturally in conversation like a friendly, helpful chatbot.
186
+ - Answer general questions using your own knowledge when no external context is provided.
187
+ - When relevant context is provided, use it accurately to answer user questions.
188
+ - Seamlessly switch between casual conversation and knowledge-based answering.
189
+
190
+ Guidelines:
191
+ - If context is provided and relevant, use it as the primary source of truth.
192
+ - If context is not provided or not relevant, respond using your general knowledge.
193
+ - Do not hallucinate or invent information.
194
+ - If you are unsure or the information is not available, clearly state that.
195
+ - Be clear, concise, and helpful in all responses.
196
+ - Maintain a natural, human-like conversational tone.
197
+ - Never mention internal implementation details such as embeddings, vector databases, or system architecture.
198
+
199
+ You are designed to provide reliable, accurate, and engaging assistance.
200
+ """
201
+ )
202
+
203
+ #===========================================
204
+ # Chat function
205
+ #===========================================
206
+
207
+ def chat(state:Ragbot_State):
208
+ query = state['query']
209
+ context = state['context']
210
+ metadata = state['metadata']
211
+ web_context = state['web_context']
212
+ model_name = state.get('model_name', 'gpt')
213
+
214
+ history = state.get("response", [])
215
+
216
+ # [CHANGED] Updated Prompt to include History so it remembers your name
217
+ prompt = f"""
218
+ You are an expert assistant designed to answer user questions using multiple information sources.
219
+
220
+ Source Priority Rules (STRICT):
221
+ 1. **Conversation History**: Check if the answer was provided in previous messages (e.g., user's name, previous topics).
222
+ 2. If the provided Context contains the answer, use ONLY the Context.
223
+ 3. If the Context does not contain the answer and Web Context is available, use the Web Context.
224
+ 4. If neither Context nor Web Context contains the answer, use your general knowledge.
225
+ 5. Do NOT invent or hallucinate facts.
226
+ 6. If the answer cannot be determined, clearly say so.
227
+
228
+ User Question:
229
+ {query}
230
+
231
+ Retrieved Context (Vector Database):
232
+ {context}
233
+
234
+ Metadata:
235
+ {metadata}
236
+
237
+ Web Context (Real-time Search):
238
+ {web_context}
239
+
240
+ Final Answer:
241
+ """
242
+
243
+ selected_llm = get_llm(model_name)
244
+ messages = [SYSTEM_PROMPT] + history + [HumanMessage(content=prompt)]
245
+ response = selected_llm.invoke(messages)
246
+ return {
247
+ 'response': [
248
+ HumanMessage(content=query),
249
+ response
250
+ ]
251
+ }
252
+
253
+ #===========================================
254
+ # Graph Declaration
255
+ #===========================================
256
+
257
+ # Keeping MemorySaver as requested (Note: RAM only, wipes on restart)
258
+ memory = MemorySaver()
259
+ graph = StateGraph(Ragbot_State)
260
+
261
+ graph.add_node("fetch_context", fetch_context)
262
+ graph.add_node("fetch_web_context", fetch_web_context)
263
+ graph.add_node("chat", chat)
264
+
265
+ graph.add_conditional_edges(
266
+ START,
267
+ router,
268
+ {
269
+ "fetch_context": "fetch_context",
270
+ "fetch_web_context": "fetch_web_context",
271
+ "chat": "chat"
272
+ }
273
+ )
274
+
275
+ graph.add_edge("fetch_context", "chat")
276
+ graph.add_edge("fetch_web_context", "chat")
277
+ graph.add_edge("chat", END)
278
+
279
+ app = graph.compile(checkpointer=memory)
280
+
281
+
282
+ #===========================================
283
+ # Helper Function
284
+ #===========================================
285
+
286
+ def ask_bot(query: str, use_rag: bool = False, use_web: bool = False, thread_id: str = "1"):
287
+ config = {"configurable": {"thread_id": thread_id}}
288
+ inputs = {
289
+ "query": query,
290
+ "RAG": use_rag,
291
+ "web_search": use_web,
292
+ "context": [],
293
+ "metadata": [],
294
+ "web_context": "",
295
+ }
296
+
297
+ result = app.invoke(inputs, config=config)
298
+ last_message = result['response'][-1]
299
+
300
+ return last_message.content
301
+
302
+
303
+ """print("--- Conversation 1 ---")
304
+ # User says hello and gives name
305
+ response = ask_bot("Hi, my name is Junaid", thread_id="session_A")
306
+ print(f"Bot: {response}")
307
+
308
+ # User asks for name (RAG and Web are OFF)
309
+ response = ask_bot("What is my name?", thread_id="session_A")
310
+ print(f"Bot: {response}")"""
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from fastapi.responses import FileResponse
4
+ import asyncio
5
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException, BackgroundTasks
6
+ from fastapi.responses import StreamingResponse
7
+ from pydantic import BaseModel
8
+ from utils import STT, TTS
9
+ from data_ingestion import Ingest_Data
10
+ from RAG import app as rag_app, Ragbot_State, reload_vector_store
11
+
12
+ # Initialize FastAPI
13
+ app = FastAPI(title="LangGraph RAG Chatbot", version="1.0")
14
+
15
+ # --- Pydantic Models ---
16
+ class ChatRequest(BaseModel):
17
+ query: str
18
+ thread_id: str = "default_user"
19
+ use_rag: bool = False
20
+ use_web: bool = False
21
+ model_name: str = "gpt"
22
+
23
+ class TTSRequest(BaseModel):
24
+ text: str
25
+ voice: str = "en-US-AriaNeural"
26
+
27
+
28
+ # --- Endpoints ---
29
+
30
+ @app.get("/")
31
+ def health_check():
32
+ return {"status": "running", "message": "Bot is ready"}
33
+
34
+ @app.post("/upload")
35
+ async def upload_document(
36
+ file: UploadFile = File(...),
37
+ background_tasks: BackgroundTasks = BackgroundTasks()
38
+ ):
39
+ try:
40
+ temp_filename = f"temp_{file.filename}"
41
+
42
+ with open(temp_filename, "wb") as buffer:
43
+ shutil.copyfileobj(file.file, buffer)
44
+
45
+ def process_and_reload(path):
46
+ try:
47
+ result = Ingest_Data(path)
48
+ print(f"Ingestion Result: {result}")
49
+ reload_vector_store()
50
+
51
+ except Exception as e:
52
+ print(f"Error processing background task: {e}")
53
+ finally:
54
+ if os.path.exists(path):
55
+ os.remove(path)
56
+
57
+ background_tasks.add_task(process_and_reload, temp_filename)
58
+
59
+ return {
60
+ "message": "File received. Processing started in background.",
61
+ "filename": file.filename
62
+ }
63
+
64
+ except Exception as e:
65
+ raise HTTPException(status_code=500, detail=str(e))
66
+
67
+
68
+ @app.post("/chat")
69
+ async def chat_endpoint(request: ChatRequest):
70
+ config = {"configurable": {"thread_id": request.thread_id}}
71
+
72
+ inputs = {
73
+ "query": request.query,
74
+ "RAG": request.use_rag,
75
+ "web_search": request.use_web,
76
+ "model_name": request.model_name,
77
+ "context": [],
78
+ "metadata": [],
79
+ "web_context": "",
80
+ }
81
+
82
+ async def event_generator():
83
+ async for event in rag_app.astream_events(inputs, config=config, version="v1"):
84
+ kind = event["event"]
85
+ if kind == "on_chat_model_stream":
86
+ content = event["data"]["chunk"].content
87
+
88
+ if content:
89
+ data = content.replace("\n", "\\n")
90
+ yield f"data: {data}\n\n"
91
+
92
+ return StreamingResponse(
93
+ event_generator(),
94
+ media_type="text/event-stream",
95
+ headers={
96
+ "Cache-Control": "no-cache",
97
+ "Connection": "keep-alive",
98
+ "X-Accel-Buffering": "no",
99
+ },
100
+ )
101
+
102
+
103
+ # ---------------- STT ---------------- #
104
+ @app.post("/stt")
105
+ async def transcribe_audio(file: UploadFile = File(...)):
106
+ try:
107
+ return await STT(file)
108
+ except Exception as e:
109
+ raise HTTPException(status_code=500, detail=str(e))
110
+
111
+ # ---------------- TTS ---------------- #
112
+ @app.post("/tts")
113
+ async def text_to_speech(req: TTSRequest):
114
+ try:
115
+ audio_path = await TTS(req.text, req.voice)
116
+ return FileResponse(audio_path, media_type="audio/mpeg", filename="output.mp3")
117
+
118
+ except Exception as e:
119
+ raise HTTPException(status_code=500, detail=str(e))
data_ingestion.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from langchain_community.document_loaders import PyPDFLoader
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain_openai import OpenAIEmbeddings
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv()
10
+
11
+ # 1. Setup Logging (Better than print for Servers)
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ embeddings = OpenAIEmbeddings(model='text-embedding-3-small')
16
+
17
+ # 2. Add arguments for flexible paths
18
+ def Ingest_Data(pdf_path: str, vector_db_path: str = "vectorstore/db_faiss"):
19
+ """
20
+ Ingests a PDF, splits it, and saves the vector store.
21
+ Returns a dict with status to send back to the Frontend.
22
+ """
23
+ try:
24
+ logger.info(f"Starting ingestion for: {pdf_path}")
25
+
26
+ # Validation: Check if file exists
27
+ if not os.path.exists(pdf_path):
28
+ raise FileNotFoundError(f"The file {pdf_path} was not found.")
29
+
30
+ # Load
31
+ loader = PyPDFLoader(pdf_path)
32
+ pages = loader.load_and_split()
33
+
34
+ if not pages:
35
+ return {"status": "error", "message": "PDF contains no text."}
36
+
37
+ # Split
38
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=250)
39
+ docs = splitter.split_documents(pages)
40
+ logger.info(f"Processing {len(docs)} chunks...")
41
+
42
+ # Embed & Save
43
+ # Note: This is CPU/Network intensive. In FastAPI,
44
+ # ensure you run this in a BackgroundTask or ThreadPool.
45
+ db = FAISS.from_documents(docs, embeddings)
46
+ db.save_local(vector_db_path)
47
+
48
+ logger.info(f"Saved vectorstore to {vector_db_path}")
49
+
50
+ # 3. Return JSON-friendly data
51
+ return {
52
+ "status": "success",
53
+ "chunks_processed": len(docs),
54
+ "db_path": vector_db_path,
55
+ "message": "File successfully ingested and indexed."
56
+ }
57
+
58
+ except Exception as e:
59
+ logger.error(f"Ingestion failed: {str(e)}")
60
+ return {
61
+ "status": "failed",
62
+ "error": str(e)
63
+ }
64
+
65
+
66
+
67
+ #Ingest_Data("MLBOOK.pdf")
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ python-dotenv
5
+ langchain-groq
6
+ langchain-openai
7
+ langchain-community
8
+ langchain-huggingface
9
+ langgraph
10
+ faiss-cpu
11
+ edge-tts
12
+ groq
utils.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from uuid import uuid4
3
+ import edge_tts
4
+ from groq import Groq
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ client = Groq()
10
+
11
+ # ==================================================
12
+ # 🎧 SPEECH TO TEXT
13
+ # ==================================================
14
+
15
+ async def STT(audio_file):
16
+ os.makedirs("uploads", exist_ok=True)
17
+ file_path = f"uploads/{uuid4().hex}.wav"
18
+
19
+ with open(file_path, "wb") as f:
20
+ f.write(await audio_file.read())
21
+
22
+ with open(file_path, "rb") as f:
23
+ transcription = client.audio.transcriptions.create(
24
+ file=f,
25
+ model="whisper-large-v3-turbo",
26
+ response_format="verbose_json",
27
+ temperature=0.0
28
+ )
29
+
30
+ # Optional: cleanup the uploaded file after processing
31
+ # os.remove(file_path)
32
+
33
+ return {
34
+ "text": transcription.text,
35
+ "segments": transcription.segments,
36
+ "language": transcription.language
37
+ }
38
+
39
+
40
+ # ==================================================
41
+ # 🗣️ TEXT TO SPEECH
42
+ # ==================================================
43
+
44
+ async def TTS(text: str, voice: str = "en-US-AriaNeural") -> str:
45
+ """
46
+ Converts text to speech and saves it to a file.
47
+ Returns the path to the generated audio file.
48
+ """
49
+ os.makedirs("outputs", exist_ok=True)
50
+ filename = f"outputs/{uuid4().hex}.mp3"
51
+
52
+ communicate = edge_tts.Communicate(text, voice)
53
+ await communicate.save(filename)
54
+
55
+ return filename