{ "cells": [ { "cell_type": "markdown", "id": "cf8f37b5", "metadata": {}, "source": [ "## 1๏ธโƒฃ Install Required Packages" ] }, { "cell_type": "code", "execution_count": null, "id": "35266b5d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "โœ… All packages installed!\n" ] } ], "source": [ "import sys\n", "import subprocess\n", "\n", "# Install packages (works in VS Code Jupyter)\n", "packages = [\n", " 'langchain-community',\n", " 'sentence-transformers',\n", " 'transformers',\n", " 'faiss-cpu',\n", " 'pypdf',\n", " 'google-generativeai',\n", " 'langchain-huggingface',\n", " 'langchain-text-splitters',\n", " 'fastapi',\n", " 'uvicorn',\n", " 'nest-asyncio',\n", " 'gradio',\n", " 'deep-translator'\n", "]\n", "\n", "print(\"๐Ÿ“ฆ Installing required packages...\")\n", "subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q'] + packages)\n", "print(\"โœ… All packages installed!\")" ] }, { "cell_type": "markdown", "id": "b09a84be", "metadata": {}, "source": [ "## 2๏ธโƒฃ Setup Local Directories (Windows)" ] }, { "cell_type": "code", "execution_count": 6, "id": "760088c8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "โœ… Local directories created!\n", "๐Ÿ“ RAG Data Location: /content/rag_data\n", "๐Ÿ“„ PDFs will be stored at: /content/rag_data/pdfs\n", "๐Ÿ—„๏ธ FAISS index at: /content/rag_data/faiss_index\n" ] } ], "source": [ "import os\n", "\n", "# Use local directories\n", "RAG_DIR = os.path.join(os.getcwd(), 'rag_data')\n", "FAISS_PATH = os.path.join(RAG_DIR, 'faiss_index')\n", "PDFS_PATH = os.path.join(RAG_DIR, 'pdfs')\n", "\n", "os.makedirs(FAISS_PATH, exist_ok=True)\n", "os.makedirs(PDFS_PATH, exist_ok=True)\n", "\n", "print(f\"โœ… Local directories created!\")\n", "print(f\"๐Ÿ“ RAG Data Location: {RAG_DIR}\")\n", "print(f\"๐Ÿ“„ PDFs will be stored at: {PDFS_PATH}\")\n", "print(f\"๐Ÿ—„๏ธ FAISS index at: {FAISS_PATH}\")" ] }, { "cell_type": "markdown", "id": "888d519c", "metadata": {}, "source": [ "## 3๏ธโƒฃ Configure Gemini API Key" ] }, { "cell_type": "code", "execution_count": 7, "id": "8902f9ef", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "โš ๏ธ WARNING: Please set your Gemini API key above!\n" ] } ], "source": [ "import google.generativeai as genai\n", "\n", "# ๐Ÿ”‘ REPLACE WITH YOUR GEMINI API KEY\n", "# Get it from: https://makersuite.google.com/app/apikey\n", "GOOGLE_API_KEY = \"YOUR_GEMINI_API_KEY_HERE\"\n", "\n", "if GOOGLE_API_KEY == \"YOUR_GEMINI_API_KEY_HERE\":\n", " print(\"โš ๏ธ WARNING: Please set your Gemini API key above!\")\n", "else:\n", " genai.configure(api_key=GOOGLE_API_KEY)\n", " print(\"โœ… Gemini API configured!\")" ] }, { "cell_type": "markdown", "id": "5b250359", "metadata": {}, "source": [ "## 4๏ธโƒฃ RAG System Functions" ] }, { "cell_type": "code", "execution_count": 8, "id": "d292e154", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING:torchao.kernel.intmm:Warning: Detected no triton, on systems without Triton certain kernels will not work\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "๐Ÿ” Checking for existing RAG data...\n", "โ„น๏ธ No existing vector store found\n", "\n", "โœ… RAG System Ready!\n" ] } ], "source": [ "import unicodedata\n", "import re\n", "import shutil\n", "from typing import List, Dict, Optional\n", "from pathlib import Path\n", "from langchain_community.document_loaders.pdf import PyPDFLoader\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", "from langchain_huggingface import HuggingFaceEmbeddings\n", "from langchain_community.vectorstores import FAISS\n", "from deep_translator import GoogleTranslator\n", "\n", "# Global variables\n", "vectordb = None\n", "retriever = None\n", "embeddings = None\n", "rag_initialized = False\n", "uploaded_documents = []\n", "\n", "\n", "def initialize_embeddings():\n", " \"\"\"Initialize multilingual embedding model (supports English & Sinhala)\"\"\"\n", " global embeddings\n", " \n", " if embeddings is not None:\n", " return embeddings\n", " \n", " print(\"๐Ÿ“ฅ Loading multilingual embedding model...\")\n", " embeddings = HuggingFaceEmbeddings(\n", " model_name=\"sentence-transformers/paraphrase-multilingual-mpnet-base-v2\"\n", " )\n", " print(\"โœ… Embedding model loaded!\")\n", " return embeddings\n", "\n", "\n", "def clean_text(text: str) -> str:\n", " \"\"\"Clean and normalize text for embedding\"\"\"\n", " if not isinstance(text, str) or not text.strip():\n", " return \"\"\n", " \n", " normalized_text = unicodedata.normalize('NFKC', text)\n", " cleaned_chars = [\n", " char for char in normalized_text\n", " if unicodedata.category(char) not in ['So', 'Cn', 'Cc', 'Cf', 'Cs']\n", " ]\n", " cleaned_text = \"\".join(cleaned_chars)\n", " cleaned_text = re.sub(r'\\s+', ' ', cleaned_text).strip()\n", " return cleaned_text\n", "\n", "\n", "def load_and_process_pdf(pdf_path: str) -> List:\n", " \"\"\"Load PDF and split into chunks\"\"\"\n", " print(f\"๐Ÿ“„ Loading PDF: {Path(pdf_path).name}\")\n", " \n", " loader = PyPDFLoader(pdf_path)\n", " docs = loader.load()\n", " \n", " splitter = RecursiveCharacterTextSplitter(\n", " chunk_size=300,\n", " chunk_overlap=80\n", " )\n", " chunks = splitter.split_documents(docs)\n", " \n", " print(f\" โœ… {len(docs)} pages โ†’ {len(chunks)} chunks\")\n", " return chunks\n", "\n", "\n", "def create_vector_store(chunks: List) -> bool:\n", " \"\"\"Create or update FAISS vector store\"\"\"\n", " global vectordb, retriever, rag_initialized\n", " \n", " initialize_embeddings()\n", " \n", " texts = [doc.page_content for doc in chunks]\n", " metadatas = [doc.metadata for doc in chunks]\n", " \n", " processed_texts = []\n", " processed_metadatas = []\n", " \n", " for i, text in enumerate(texts):\n", " cleaned_text = clean_text(text)\n", " if cleaned_text:\n", " processed_texts.append(cleaned_text)\n", " processed_metadatas.append(metadatas[i])\n", " \n", " if not processed_texts:\n", " print(\"โš ๏ธ No valid texts after cleaning\")\n", " return False\n", " \n", " print(f\"๐Ÿ”„ Creating embeddings for {len(processed_texts)} chunks...\")\n", " \n", " if vectordb is None:\n", " vectordb = FAISS.from_texts(processed_texts, embeddings, metadatas=processed_metadatas)\n", " else:\n", " new_vectordb = FAISS.from_texts(processed_texts, embeddings, metadatas=processed_metadatas)\n", " vectordb.merge_from(new_vectordb)\n", " \n", " retriever = vectordb.as_retriever(search_kwargs={\"k\": 4})\n", " rag_initialized = True\n", " \n", " save_vector_store()\n", " return True\n", "\n", "\n", "def save_vector_store():\n", " \"\"\"Save FAISS index to local storage\"\"\"\n", " if vectordb is None:\n", " return\n", " \n", " vectordb.save_local(FAISS_PATH)\n", " print(f\"๐Ÿ’พ Vector store saved locally\")\n", "\n", "\n", "def load_vector_store() -> bool:\n", " \"\"\"Load FAISS index from local storage\"\"\"\n", " global vectordb, retriever, rag_initialized, uploaded_documents\n", " \n", " index_file = os.path.join(FAISS_PATH, 'index.faiss')\n", " if not os.path.exists(index_file):\n", " print(\"โ„น๏ธ No existing vector store found\")\n", " return False\n", " \n", " try:\n", " initialize_embeddings()\n", " vectordb = FAISS.load_local(\n", " FAISS_PATH, \n", " embeddings,\n", " allow_dangerous_deserialization=True\n", " )\n", " retriever = vectordb.as_retriever(search_kwargs={\"k\": 4})\n", " rag_initialized = True\n", " \n", " # Load document list\n", " uploaded_documents = [f for f in os.listdir(PDFS_PATH) if f.endswith('.pdf')]\n", " \n", " print(f\"โœ… Loaded existing vector store\")\n", " print(f\"๐Ÿ“š {len(uploaded_documents)} documents found\")\n", " return True\n", " except Exception as e:\n", " print(f\"โš ๏ธ Failed to load vector store: {e}\")\n", " return False\n", "\n", "\n", "def translate_to_english(text: str) -> str:\n", " \"\"\"Translate any language to English\"\"\"\n", " try:\n", " translator = GoogleTranslator(source='auto', target='en')\n", " return translator.translate(text)\n", " except:\n", " return text # Return original if translation fails\n", "\n", "\n", "def rag_answer(question: str, relevance_threshold: float = 2.0, translate: bool = True) -> Dict:\n", " \"\"\"Answer question using RAG - check database first, fallback to Gemini\"\"\"\n", " global retriever, vectordb\n", " \n", " # Translate to English if needed\n", " original_question = question\n", " if translate:\n", " question = translate_to_english(question)\n", " \n", " result = {\n", " \"question\": original_question,\n", " \"question_english\": question,\n", " \"answer\": \"\",\n", " \"source\": \"none\",\n", " \"context_found\": False,\n", " \"relevance_score\": 0.0\n", " }\n", " \n", " if not rag_initialized or retriever is None:\n", " print(\"โš ๏ธ RAG not initialized, using Gemini\")\n", " result[\"source\"] = \"gemini\"\n", " result[\"answer\"] = ask_gemini_directly(question)\n", " return result\n", " \n", " # Search vector database\n", " docs_with_scores = vectordb.similarity_search_with_score(question, k=4)\n", " \n", " if not docs_with_scores:\n", " print(\"โš ๏ธ No documents found, using Gemini\")\n", " result[\"source\"] = \"gemini\"\n", " result[\"answer\"] = ask_gemini_directly(question)\n", " return result\n", " \n", " best_score = docs_with_scores[0][1]\n", " result[\"relevance_score\"] = float(best_score)\n", " \n", " # Check relevance threshold\n", " if best_score > relevance_threshold:\n", " print(f\"โš ๏ธ Low relevance (score: {best_score:.3f}), using Gemini\")\n", " result[\"source\"] = \"gemini\"\n", " result[\"answer\"] = ask_gemini_directly(question)\n", " return result\n", " \n", " # Good relevance - use RAG\n", " print(f\"โœ… Good relevance (score: {best_score:.3f}), answering from documents\")\n", " docs = [doc for doc, score in docs_with_scores]\n", " context = \"\\n\\n\".join([d.page_content for d in docs])\n", " result[\"context_found\"] = True\n", " \n", " prompt = f\"\"\"Answer the question based on the following context from PDF documents. If the context doesn't contain enough information, say \"I don't have enough information in the documents.\"\n", "\n", "Context:\n", "{context}\n", "\n", "Question: {question}\n", "\n", "Answer:\"\"\"\n", " \n", " try:\n", " model = genai.GenerativeModel(\"models/gemini-1.5-flash\")\n", " response = model.generate_content(prompt)\n", " result[\"answer\"] = response.text\n", " result[\"source\"] = \"rag\"\n", " except Exception as e:\n", " print(f\"โŒ RAG generation error: {e}\")\n", " result[\"answer\"] = f\"Error: {str(e)}\"\n", " result[\"source\"] = \"error\"\n", " \n", " return result\n", "\n", "\n", "def ask_gemini_directly(question: str) -> str:\n", " \"\"\"Fallback: Ask Gemini directly without RAG\"\"\"\n", " try:\n", " model = genai.GenerativeModel(\"models/gemini-1.5-flash\")\n", " response = model.generate_content(f\"Answer this question: {question}\")\n", " return response.text\n", " except Exception as e:\n", " return f\"Error: {str(e)}\"\n", "\n", "\n", "def process_uploaded_pdf(file_path: str, original_filename: str) -> str:\n", " \"\"\"Process uploaded PDF from admin panel\"\"\"\n", " try:\n", " # Copy to local storage\n", " dest_path = os.path.join(PDFS_PATH, original_filename)\n", " shutil.copy(file_path, dest_path)\n", " \n", " # Process PDF\n", " chunks = load_and_process_pdf(dest_path)\n", " \n", " if not chunks:\n", " return f\"โŒ Failed to extract text from {original_filename}\"\n", " \n", " # Create/update vector store\n", " success = create_vector_store(chunks)\n", " \n", " if success:\n", " if original_filename not in uploaded_documents:\n", " uploaded_documents.append(original_filename)\n", " return f\"โœ… Successfully processed '{original_filename}'\\n ๐Ÿ“Š {len(chunks)} chunks created\\n ๐Ÿ“š Total documents: {len(uploaded_documents)}\"\n", " else:\n", " return f\"โŒ Failed to process {original_filename}\"\n", " \n", " except Exception as e:\n", " return f\"โŒ Error: {str(e)}\"\n", "\n", "\n", "def get_status() -> Dict:\n", " \"\"\"Get RAG system status\"\"\"\n", " return {\n", " \"initialized\": rag_initialized,\n", " \"documents_count\": len(uploaded_documents),\n", " \"documents\": uploaded_documents,\n", " \"has_vector_store\": vectordb is not None,\n", " \"storage_path\": PDFS_PATH\n", " }\n", "\n", "\n", "# Try to load existing data\n", "print(\"๐Ÿ” Checking for existing RAG data...\")\n", "load_vector_store()\n", "\n", "print(\"\\nโœ… RAG System Ready!\")" ] }, { "cell_type": "markdown", "id": "bee976ec", "metadata": {}, "source": [ "## 5๏ธโƒฃ Admin Panel - Upload PDFs Here! ๐Ÿ“ค" ] }, { "cell_type": "code", "execution_count": 9, "id": "7fad545f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipython-input-3459415953.py:45: DeprecationWarning: The 'theme' parameter in the Blocks constructor will be removed in Gradio 6.0. You will need to pass 'theme' to Blocks.launch() instead.\n", " with gr.Blocks(title=\"RAG Admin Panel\", theme=gr.themes.Soft()) as admin_panel:\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "๐ŸŽ›๏ธ Launching Admin Panel...\n", "\n", "Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().\n", "Note: opening Chrome Inspector may crash demo inside Colab notebooks.\n", "* To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "application/javascript": "(async (port, path, width, height, cache, element) => {\n if (!google.colab.kernel.accessAllowed && !cache) {\n return;\n }\n element.appendChild(document.createTextNode(''));\n const url = await google.colab.kernel.proxyPort(port, {cache});\n\n const external_link = document.createElement('div');\n external_link.innerHTML = `\n
\n Running on \n https://localhost:${port}${path}\n \n
\n `;\n element.appendChild(external_link);\n\n const iframe = document.createElement('iframe');\n iframe.src = new URL(path, url).toString();\n iframe.height = height;\n iframe.allow = \"autoplay; camera; microphone; clipboard-read; clipboard-write;\"\n iframe.width = width;\n iframe.style.border = 0;\n element.appendChild(iframe);\n })(7860, \"/\", \"100%\", 500, false, window.element)", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Keyboard interruption in main thread... closing server.\n" ] }, { "data": { "text/plain": [] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import gradio as gr\n", "\n", "def upload_pdf_handler(file):\n", " \"\"\"Handle PDF upload from Gradio interface\"\"\"\n", " if file is None:\n", " return \"โš ๏ธ Please select a PDF file\"\n", " \n", " if not file.name.endswith('.pdf'):\n", " return \"โŒ Only PDF files are allowed\"\n", " \n", " filename = os.path.basename(file.name)\n", " result = process_uploaded_pdf(file.name, filename)\n", " return result\n", "\n", "\n", "def test_query_handler(question, threshold):\n", " \"\"\"Test RAG query from admin panel\"\"\"\n", " if not question:\n", " return \"โš ๏ธ Please enter a question\"\n", " \n", " result = rag_answer(question, relevance_threshold=threshold)\n", " \n", " output = f\"\"\"**Question:** {result['question']}\n", "**English:** {result['question_english']}\n", "**Source:** {result['source'].upper()} ({result['relevance_score']:.3f})\n", "\n", "**Answer:**\n", "{result['answer']}\n", "\"\"\"\n", " return output\n", "\n", "\n", "def get_status_handler():\n", " \"\"\"Get system status\"\"\"\n", " status = get_status()\n", " return f\"\"\"**RAG System Status:**\n", "- Initialized: {status['initialized']}\n", "- Documents: {status['documents_count']}\n", "- Files: {', '.join(status['documents']) if status['documents'] else 'None'}\n", "- Storage: {status['storage_path']}\n", "\"\"\"\n", "\n", "\n", "# Create Gradio Interface\n", "with gr.Blocks(title=\"RAG Admin Panel\", theme=gr.themes.Soft()) as admin_panel:\n", " gr.Markdown(\n", " \"\"\"\n", " # ๐ŸŽ›๏ธ RAG Admin Panel\n", " ### Upload PDFs and manage your RAG database\n", " \"\"\"\n", " )\n", " \n", " with gr.Tab(\"๐Ÿ“ค Upload PDFs\"):\n", " gr.Markdown(\"### Upload PDF Documents\")\n", " with gr.Row():\n", " with gr.Column():\n", " pdf_input = gr.File(\n", " label=\"Select PDF File\",\n", " file_types=[\".pdf\"],\n", " type=\"filepath\"\n", " )\n", " upload_btn = gr.Button(\"๐Ÿ“ค Upload & Process\", variant=\"primary\")\n", " with gr.Column():\n", " upload_output = gr.Textbox(\n", " label=\"Upload Status\",\n", " lines=5,\n", " interactive=False\n", " )\n", " \n", " upload_btn.click(\n", " fn=upload_pdf_handler,\n", " inputs=pdf_input,\n", " outputs=upload_output\n", " )\n", " \n", " with gr.Tab(\"๐Ÿงช Test Queries\"):\n", " gr.Markdown(\"### Test your RAG system\")\n", " with gr.Row():\n", " with gr.Column():\n", " question_input = gr.Textbox(\n", " label=\"Question (English or Sinhala)\",\n", " placeholder=\"What is a wired network?\",\n", " lines=2\n", " )\n", " threshold_slider = gr.Slider(\n", " minimum=0.5,\n", " maximum=3.0,\n", " value=2.0,\n", " step=0.1,\n", " label=\"Relevance Threshold (lower = stricter)\"\n", " )\n", " query_btn = gr.Button(\"๐Ÿ” Ask Question\", variant=\"primary\")\n", " with gr.Column():\n", " query_output = gr.Markdown(label=\"Answer\")\n", " \n", " query_btn.click(\n", " fn=test_query_handler,\n", " inputs=[question_input, threshold_slider],\n", " outputs=query_output\n", " )\n", " \n", " with gr.Tab(\"๐Ÿ“Š Status\"):\n", " gr.Markdown(\"### System Status\")\n", " status_output = gr.Markdown()\n", " status_btn = gr.Button(\"๐Ÿ”„ Refresh Status\")\n", " \n", " status_btn.click(\n", " fn=get_status_handler,\n", " outputs=status_output\n", " )\n", " \n", " # Auto-load status on startup\n", " admin_panel.load(fn=get_status_handler, outputs=status_output)\n", "\n", "# Launch admin panel\n", "print(\"\\n๐ŸŽ›๏ธ Launching Admin Panel...\\n\")\n", "admin_panel.launch(share=False, server_name=\"127.0.0.1\", server_port=7860, debug=True)" ] }, { "cell_type": "markdown", "id": "3b658bf7", "metadata": {}, "source": [ "## 6๏ธโƒฃ Public API - Query from Anywhere! ๐ŸŒ\n", "*Note: This will run on port 8000, make sure Gradio admin panel is already running on port 7860*" ] }, { "cell_type": "code", "execution_count": null, "id": "5fd82e6d", "metadata": {}, "outputs": [], "source": [ "from fastapi import FastAPI, HTTPException, UploadFile, File\n", "from pydantic import BaseModel\n", "import nest_asyncio\n", "import uvicorn\n", "import threading\n", "import tempfile\n", "\n", "# Allow nested event loops\n", "nest_asyncio.apply()\n", "\n", "# Create FastAPI app\n", "app = FastAPI(\n", " title=\"RAG API\",\n", " description=\"Query RAG database or upload PDFs via API\",\n", " version=\"1.0\"\n", ")\n", "\n", "class QuestionRequest(BaseModel):\n", " question: str\n", " threshold: float = 2.0\n", " translate: bool = True\n", "\n", "class AnswerResponse(BaseModel):\n", " question: str\n", " question_english: str\n", " answer: str\n", " source: str\n", " relevance_score: float\n", " context_found: bool\n", "\n", "\n", "@app.get(\"/\")\n", "async def root():\n", " return {\n", " \"message\": \"๐Ÿš€ RAG API is running!\",\n", " \"endpoints\": {\n", " \"POST /ask\": \"Ask a question to RAG system\",\n", " \"POST /upload\": \"Upload a PDF file\",\n", " \"GET /status\": \"Check system status\",\n", " \"GET /documents\": \"List uploaded documents\"\n", " }\n", " }\n", "\n", "\n", "@app.post(\"/ask\", response_model=AnswerResponse)\n", "async def ask_question(request: QuestionRequest):\n", " \"\"\"Ask a question to RAG system\"\"\"\n", " if not request.question:\n", " raise HTTPException(status_code=400, detail=\"Question is required\")\n", " \n", " result = rag_answer(\n", " request.question,\n", " relevance_threshold=request.threshold,\n", " translate=request.translate\n", " )\n", " \n", " return AnswerResponse(\n", " question=result[\"question\"],\n", " question_english=result[\"question_english\"],\n", " answer=result[\"answer\"],\n", " source=result[\"source\"],\n", " relevance_score=result[\"relevance_score\"],\n", " context_found=result[\"context_found\"]\n", " )\n", "\n", "\n", "@app.post(\"/upload\")\n", "async def upload_pdf_api(file: UploadFile = File(...)):\n", " \"\"\"Upload a PDF via API\"\"\"\n", " if not file.filename.endswith('.pdf'):\n", " raise HTTPException(status_code=400, detail=\"Only PDF files allowed\")\n", " \n", " try:\n", " # Save temporarily\n", " with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:\n", " content = await file.read()\n", " temp_file.write(content)\n", " temp_path = temp_file.name\n", " \n", " # Process\n", " result = process_uploaded_pdf(temp_path, file.filename)\n", " \n", " # Clean up temp file\n", " try:\n", " os.unlink(temp_path)\n", " except:\n", " pass\n", " \n", " return {\n", " \"success\": \"โœ…\" in result,\n", " \"message\": result,\n", " \"filename\": file.filename\n", " }\n", " except Exception as e:\n", " raise HTTPException(status_code=500, detail=str(e))\n", "\n", "\n", "@app.get(\"/status\")\n", "async def api_status():\n", " \"\"\"Get RAG system status\"\"\"\n", " return get_status()\n", "\n", "\n", "@app.get(\"/documents\")\n", "async def list_documents():\n", " \"\"\"List all uploaded documents\"\"\"\n", " return {\n", " \"count\": len(uploaded_documents),\n", " \"documents\": uploaded_documents\n", " }\n", "\n", "\n", "def run_server():\n", " \"\"\"Run the FastAPI server in a thread\"\"\"\n", " uvicorn.run(app, host=\"127.0.0.1\", port=8000, log_level=\"info\")\n", "\n", "\n", "# Start server in background thread\n", "server_thread = threading.Thread(target=run_server, daemon=True)\n", "server_thread.start()\n", "\n", "print(\"\\n\" + \"=\"*70)\n", "print(\"๐ŸŒ LOCAL API SERVER STARTED!\")\n", "print(\"=\"*70)\n", "print(\"\\n๐Ÿ“Œ API Endpoints:\")\n", "print(\" POST http://localhost:8000/ask - Ask a question\")\n", "print(\" POST http://localhost:8000/upload - Upload PDF\")\n", "print(\" GET http://localhost:8000/status - System status\")\n", "print(\" GET http://localhost:8000/documents - List documents\")\n", "print(\" GET http://localhost:8000/docs - API documentation\")\n", "print(\"\\n๐Ÿ’ก Example curl command:\")\n", "print(' curl -X POST \"http://localhost:8000/ask\" ^')\n", "print(' -H \"Content-Type: application/json\" ^')\n", "print(' -d \"{\\\\\"question\\\\\": \\\\\"What is a network?\\\\\", \\\\\"threshold\\\\\": 2.0}\"')\n", "print(\"\\n๐Ÿ”„ API Server is running in background...\")\n", "print(\" (Server will stop when notebook kernel is restarted)\\n\")" ] }, { "cell_type": "markdown", "id": "a8c7b576", "metadata": {}, "source": [ "---\n", "\n", "## ๐ŸŽ‰ You're Done! Here's What You Have:\n", "\n", "### โœ… Admin Panel (Cell 5)\n", "- Drag & drop PDF upload interface\n", "- Test queries in real-time\n", "- View system status\n", "- **Access at:** http://localhost:7860\n", "\n", "### โœ… Public API (Cell 6)\n", "- RESTful API endpoints\n", "- Query from any app/website\n", "- Upload PDFs programmatically\n", "- **Access at:** http://localhost:8000\n", "- **API Docs:** http://localhost:8000/docs\n", "\n", "### โœ… Local Storage\n", "- All data saved to `rag_data/` folder in your project\n", "- Survives notebook restarts\n", "- Easy to backup\n", "\n", "---\n", "\n", "## ๐Ÿ”ฅ Integration Examples:\n", "\n", "### Python:\n", "```python\n", "import requests\n", "\n", "url = \"http://localhost:8000/ask\"\n", "response = requests.post(url, json={\n", " \"question\": \"What is a wired network?\",\n", " \"threshold\": 2.0\n", "})\n", "print(response.json()['answer'])\n", "```\n", "\n", "### JavaScript:\n", "```javascript\n", "fetch('http://localhost:8000/ask', {\n", " method: 'POST',\n", " headers: { 'Content-Type': 'application/json' },\n", " body: JSON.stringify({ \n", " question: 'What is a network?',\n", " threshold: 2.0 \n", " })\n", "})\n", ".then(r => r.json())\n", ".then(data => console.log(data.answer));\n", "```\n", "\n", "### Your Chatbot:\n", "Update your chatbot to call `http://localhost:8000/ask` instead of the old endpoint!\n", "\n", "---\n", "\n", "## ๐Ÿ“ Usage Instructions:\n", "\n", "1. **Run Cells 1-4** to setup (one time)\n", "2. **Run Cell 5** to start Admin Panel at http://localhost:7860\n", "3. **Upload PDFs** via the Admin Panel\n", "4. **Run Cell 6** to start API Server at http://localhost:8000\n", "5. **Test queries** via Admin Panel or API\n", "\n", "## ๐Ÿ› ๏ธ Troubleshooting:\n", "\n", "- **Port already in use?** Change `server_port=7860` or `port=8000` to different numbers\n", "- **Can't access?** Make sure Windows Firewall allows local connections\n", "- **Need to access from other devices?** Change `127.0.0.1` to `0.0.0.0` (security risk!)\n", "\n", "## ๐Ÿš€ Next Steps:\n", "\n", "- Upload PDFs via Admin Panel (drag & drop)\n", "- Test queries in Admin Panel\n", "- Integrate API with your chatbot app\n", "- Adjust relevance threshold as needed\n", "\n", "**Need help?** Re-run any cell to restart that component!" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" } }, "nbformat": 4, "nbformat_minor": 5 }