Spaces:

Abeshith
/

RAG-Project

Running

App Files Files Community

Abeshith commited on Dec 2, 2025

Commit

17aafad

1 Parent(s): 7399728

Fix LangChain imports for 0.3.x compatibility

Browse files

Files changed (12) hide show

.github/workflows/docker-build-push.yml +54 -0
.gitignore +11 -0
.python-version +1 -0
Docker Guide.md +17 -0
main.py +59 -0
notebooks/vanilla_rag.ipynb +208 -0
project/pipeline/agents.py +1 -1
project/pipeline/rag.py +1 -1
pyproject.toml +33 -0
requirements.txt +2 -0
uv.lock +0 -0
workflow.png +0 -0

.github/workflows/docker-build-push.yml ADDED Viewed

	@@ -0,0 +1,54 @@

+name: Build and Push Docker Image
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata for Docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=sha
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+      - name: Image digest
+        run: echo ${{ steps.docker_build.outputs.digest }}

.gitignore ADDED Viewed

	@@ -0,0 +1,11 @@

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv
+.env

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11

Docker Guide.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Docker commands for RAG Project
+## Build the Docker image
+docker build -t rag-project .
+## Run the container
+docker run -d -p 8000:8000 --name rag-app \
+  -e GROQ_API_KEY=your_groq_api_key \
+  -e GOOGLE_API_KEY=your_google_api_key \
+  -e LANGSMITH_API_KEY=your_langsmith_api_key \
+  -e TAVILY_API_KEY=your_tavily_api_key \
+  rag-project
+## Run with .env file
+docker run -d -p 8000:8000 --name rag-app --env-file .env rag-project

main.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+from dotenv import load_dotenv
+from project.pipeline.agents import AgentWorkflow
+from project.logger.logging import get_logger
+load_dotenv()
+logger = get_logger(__name__)
+def setup_langsmith():
+    langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
+    if langsmith_api_key:
+        os.environ["LANGCHAIN_TRACING_V2"] = "true"
+        os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
+        os.environ["LANGCHAIN_API_KEY"] = langsmith_api_key
+        os.environ["LANGCHAIN_PROJECT"] = "rag-corrective-pipeline"
+        logger.info("LangSmith tracing enabled")
+    else:
+        logger.warning("LANGSMITH_API_KEY not found, tracing disabled")
+def main():
+    setup_langsmith()
+    logger.info("Starting RAG application...")
+    agent = AgentWorkflow()
+    logger.info("Setting up pipeline with Attention Is All You Need paper...")
+    agent.setup(use_attention_paper=True)
+    agent.save_graph("workflow.png")
+    logger.info("Workflow graph saved")
+    questions = [
+        "What is the attention mechanism in transformers?",
+        "Explain the multi-head attention.",
+        "What are the advantages of the transformer architecture?"
+    ]
+    print("\n" + "="*80)
+    print("RAG PIPELINE WITH CORRECTIVE RAG (CRAG)")
+    print("="*80 + "\n")
+    for i, question in enumerate(questions, 1):
+        print(f"\n{'='*80}")
+        print(f"Question {i}: {question}")
+        print(f"{'='*80}\n")
+        answer = agent.run(question)
+        print(f"\nAnswer:\n{answer}\n")
+        print(f"{'='*80}\n")
+    logger.info("RAG application completed successfully")
+if __name__ == "__main__":
+    main()

notebooks/vanilla_rag.ipynb ADDED Viewed

	@@ -0,0 +1,208 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "171dc240",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dotenv import load_dotenv\n",
+    "import os\n",
+    "load_dotenv() \n",
+    "os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')\n",
+    "os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "efbca25c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.document_loaders import TextLoader\n",
+    "\n",
+    "loader = TextLoader('..\\data\\state_of_the_union.txt', encoding='utf8')\n",
+    "documents = loader.load()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "203b53b3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.  \n",
+      "\n",
+      "Last year COVID-19 kept us apart. This year we are finally together again. \n",
+      "\n",
+      "Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n",
+      "\n",
+      "With a duty to one another to the American people to the Constitution. \n",
+      "\n",
+      "And with an unwavering resolve that freedom will always triumph over tyranny. \n",
+      "\n",
+      "Six day\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(documents[0].page_content[:500])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "76bdd56f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
+    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
+    "chunks = text_splitter.split_documents(documents)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "3fd6b5dd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.embeddings import FastEmbedEmbeddings\n",
+    "embeddings = FastEmbedEmbeddings(model_name=\"BAAI/bge-small-en-v1.5\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "9d79271e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.vectorstores import FAISS\n",
+    "\n",
+    "vectorstore = FAISS.from_documents(chunks, embeddings)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "53ec2306",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "retriever = vectorstore.as_retriever(search_type=\"mmr\", search_kwargs={\"k\":3})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "1c9181f3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_groq import ChatGroq\n",
+    "llm = ChatGroq(model='openai/gpt-oss-120b', temperature=0.1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "11181278",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "template = \"\"\"\n",
+    "You are a helpful AI assistant. Use the following pieces of context to answer the question at the end. \n",
+    "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
+    "Use the information to provide a concise and accurate answer.\n",
+    "Question: {question}\n",
+    "context: {context}\n",
+    "\"\"\"\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_template(template)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "79752ec8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.runnables import RunnablePassthrough\n",
+    "from langchain_core.output_parsers import StrOutputParser\n",
+    "rag_chain = (\n",
+    "    {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
+    "    | prompt\n",
+    "    | llm\n",
+    "    | StrOutputParser()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "5d88e579",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "**Madam Speaker** – the title used for the presiding officer of the U.S. House of Representatives when that officer is a woman (the Speaker of the House at the time of the address).\n",
+      "\n",
+      "**What her (the address’s) speech is about** – the President’s opening remarks to the joint session of Congress. In this portion he:\n",
+      "\n",
+      "* Acknowledges the recent COVID‑19 pandemic and the fact that the nation is now gathering together again.  \n",
+      "* Calls for bipartisan unity – Democrats, Republicans and Independents – as “Americans” first.  \n",
+      "* Re‑affirms the nation’s commitment to the Constitution and to freedom.  \n",
+      "* Condemns Russia’s invasion of Ukraine, describing Vladimir Putin’s attempt to “shake the foundations of the free world” and praising the courage and determination of the Ukrainian people.  \n",
+      "\n",
+      "So, “Madam Speaker” is the female Speaker of the House, and the speech she is hearing focuses on national recovery, bipartisan unity, and a strong stance against Russian aggression in Ukraine.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(rag_chain.invoke(\"Who is Madam Speaker and What is Her Speech About?\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a56e9e22",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "RAG Project",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

project/pipeline/agents.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 from typing import List, Literal
 from typing_extensions import TypedDict
 from pydantic import BaseModel, Field
-from langchain.schema import Document
 from langchain_core.output_parsers import StrOutputParser
 from langgraph.graph import END, StateGraph, START
 from project.pipeline.rag import RAGPipeline

 from typing import List, Literal
 from typing_extensions import TypedDict
 from pydantic import BaseModel, Field
+from langchain_core.documents import Document
 from langchain_core.output_parsers import StrOutputParser
 from langgraph.graph import END, StateGraph, START
 from project.pipeline.rag import RAGPipeline

project/pipeline/rag.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from typing import List, Dict, Any
-from langchain.schema import Document
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
 from project.source.data_preparation import DataPreparation

 from typing import List, Dict, Any
+from langchain_core.documents import Document
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
 from project.source.data_preparation import DataPreparation

pyproject.toml ADDED Viewed

	@@ -0,0 +1,33 @@

+[project]
+name = "rag-project"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "arxiv>=2.3.1",
+    "chromadb>=1.3.5",
+    "faiss-cpu>=1.13.0",
+    "fastembed>=0.7.3",
+    "fastapi>=0.115.0",
+    "flashrank>=0.2.10",
+    "google-generativeai>=0.8.3",
+    "gradio>=6.0.1",
+    "ipykernel>=7.1.0",
+    "jinja2>=3.1.0",
+    "langchain>=0.3.0",
+    "langchain-chroma>=0.1.0",
+    "langchain-community>=0.3.0",
+    "langchain-google-genai>=2.0.5",
+    "langchain-groq>=0.2.0",
+    "langchain-mistralai>=0.2.0",
+    "langgraph>=0.2.0",
+    "pillow>=11.3.0",
+    "pypdf>=6.4.0",
+    "python-dotenv>=1.2.1",
+    "python-multipart>=0.0.20",
+    "rapidocr-onnxruntime>=1.4.4",
+    "structlog>=25.5.0",
+    "tiktoken>=0.12.0",
+    "uvicorn>=0.34.0",
+]

requirements.txt CHANGED Viewed

@@ -18,3 +18,5 @@ python-multipart
 rapidocr-onnxruntime
 tiktoken
 uvicorn

 rapidocr-onnxruntime
 tiktoken
 uvicorn
+langchain-core

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

workflow.png ADDED Viewed