Abeshith commited on
Commit
17aafad
·
1 Parent(s): 7399728

Fix LangChain imports for 0.3.x compatibility

Browse files
.github/workflows/docker-build-push.yml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build and Push Docker Image
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ branches:
9
+ - main
10
+
11
+ env:
12
+ REGISTRY: ghcr.io
13
+ IMAGE_NAME: ${{ github.repository }}
14
+
15
+ jobs:
16
+ build-and-push:
17
+ runs-on: ubuntu-latest
18
+ permissions:
19
+ contents: read
20
+ packages: write
21
+
22
+ steps:
23
+ - name: Checkout repository
24
+ uses: actions/checkout@v4
25
+
26
+ - name: Log in to GitHub Container Registry
27
+ uses: docker/login-action@v3
28
+ with:
29
+ registry: ${{ env.REGISTRY }}
30
+ username: ${{ github.actor }}
31
+ password: ${{ secrets.GITHUB_TOKEN }}
32
+
33
+ - name: Extract metadata for Docker
34
+ id: meta
35
+ uses: docker/metadata-action@v5
36
+ with:
37
+ images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
38
+ tags: |
39
+ type=ref,event=branch
40
+ type=ref,event=pr
41
+ type=semver,pattern={{version}}
42
+ type=semver,pattern={{major}}.{{minor}}
43
+ type=sha
44
+
45
+ - name: Build and push Docker image
46
+ uses: docker/build-push-action@v5
47
+ with:
48
+ context: .
49
+ push: true
50
+ tags: ${{ steps.meta.outputs.tags }}
51
+ labels: ${{ steps.meta.outputs.labels }}
52
+
53
+ - name: Image digest
54
+ run: echo ${{ steps.docker_build.outputs.digest }}
.gitignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+ .env
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11
Docker Guide.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Docker commands for RAG Project
2
+
3
+ ## Build the Docker image
4
+ docker build -t rag-project .
5
+
6
+ ## Run the container
7
+ docker run -d -p 8000:8000 --name rag-app \
8
+ -e GROQ_API_KEY=your_groq_api_key \
9
+ -e GOOGLE_API_KEY=your_google_api_key \
10
+ -e LANGSMITH_API_KEY=your_langsmith_api_key \
11
+ -e TAVILY_API_KEY=your_tavily_api_key \
12
+ rag-project
13
+
14
+ ## Run with .env file
15
+ docker run -d -p 8000:8000 --name rag-app --env-file .env rag-project
16
+
17
+
main.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from project.pipeline.agents import AgentWorkflow
4
+ from project.logger.logging import get_logger
5
+
6
+ load_dotenv()
7
+
8
+ logger = get_logger(__name__)
9
+
10
+
11
+ def setup_langsmith():
12
+ langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
13
+ if langsmith_api_key:
14
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
15
+ os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
16
+ os.environ["LANGCHAIN_API_KEY"] = langsmith_api_key
17
+ os.environ["LANGCHAIN_PROJECT"] = "rag-corrective-pipeline"
18
+ logger.info("LangSmith tracing enabled")
19
+ else:
20
+ logger.warning("LANGSMITH_API_KEY not found, tracing disabled")
21
+
22
+
23
+ def main():
24
+ setup_langsmith()
25
+ logger.info("Starting RAG application...")
26
+
27
+ agent = AgentWorkflow()
28
+
29
+ logger.info("Setting up pipeline with Attention Is All You Need paper...")
30
+ agent.setup(use_attention_paper=True)
31
+
32
+ agent.save_graph("workflow.png")
33
+ logger.info("Workflow graph saved")
34
+
35
+ questions = [
36
+ "What is the attention mechanism in transformers?",
37
+ "Explain the multi-head attention.",
38
+ "What are the advantages of the transformer architecture?"
39
+ ]
40
+
41
+ print("\n" + "="*80)
42
+ print("RAG PIPELINE WITH CORRECTIVE RAG (CRAG)")
43
+ print("="*80 + "\n")
44
+
45
+ for i, question in enumerate(questions, 1):
46
+ print(f"\n{'='*80}")
47
+ print(f"Question {i}: {question}")
48
+ print(f"{'='*80}\n")
49
+
50
+ answer = agent.run(question)
51
+
52
+ print(f"\nAnswer:\n{answer}\n")
53
+ print(f"{'='*80}\n")
54
+
55
+ logger.info("RAG application completed successfully")
56
+
57
+
58
+ if __name__ == "__main__":
59
+ main()
notebooks/vanilla_rag.ipynb ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "171dc240",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from dotenv import load_dotenv\n",
11
+ "import os\n",
12
+ "load_dotenv() \n",
13
+ "os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')\n",
14
+ "os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 2,
20
+ "id": "efbca25c",
21
+ "metadata": {},
22
+ "outputs": [],
23
+ "source": [
24
+ "from langchain_community.document_loaders import TextLoader\n",
25
+ "\n",
26
+ "loader = TextLoader('..\\data\\state_of_the_union.txt', encoding='utf8')\n",
27
+ "documents = loader.load()"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": 3,
33
+ "id": "203b53b3",
34
+ "metadata": {},
35
+ "outputs": [
36
+ {
37
+ "name": "stdout",
38
+ "output_type": "stream",
39
+ "text": [
40
+ "Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n",
41
+ "\n",
42
+ "Last year COVID-19 kept us apart. This year we are finally together again. \n",
43
+ "\n",
44
+ "Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n",
45
+ "\n",
46
+ "With a duty to one another to the American people to the Constitution. \n",
47
+ "\n",
48
+ "And with an unwavering resolve that freedom will always triumph over tyranny. \n",
49
+ "\n",
50
+ "Six day\n"
51
+ ]
52
+ }
53
+ ],
54
+ "source": [
55
+ "print(documents[0].page_content[:500])"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "execution_count": 4,
61
+ "id": "76bdd56f",
62
+ "metadata": {},
63
+ "outputs": [],
64
+ "source": [
65
+ "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
66
+ "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
67
+ "chunks = text_splitter.split_documents(documents)"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": 10,
73
+ "id": "3fd6b5dd",
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "from langchain_community.embeddings import FastEmbedEmbeddings\n",
78
+ "embeddings = FastEmbedEmbeddings(model_name=\"BAAI/bge-small-en-v1.5\")"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 11,
84
+ "id": "9d79271e",
85
+ "metadata": {},
86
+ "outputs": [],
87
+ "source": [
88
+ "from langchain_community.vectorstores import FAISS\n",
89
+ "\n",
90
+ "vectorstore = FAISS.from_documents(chunks, embeddings)"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": 13,
96
+ "id": "53ec2306",
97
+ "metadata": {},
98
+ "outputs": [],
99
+ "source": [
100
+ "retriever = vectorstore.as_retriever(search_type=\"mmr\", search_kwargs={\"k\":3})"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": 14,
106
+ "id": "1c9181f3",
107
+ "metadata": {},
108
+ "outputs": [],
109
+ "source": [
110
+ "from langchain_groq import ChatGroq\n",
111
+ "llm = ChatGroq(model='openai/gpt-oss-120b', temperature=0.1)"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": 15,
117
+ "id": "11181278",
118
+ "metadata": {},
119
+ "outputs": [],
120
+ "source": [
121
+ "from langchain_core.prompts import ChatPromptTemplate\n",
122
+ "\n",
123
+ "template = \"\"\"\n",
124
+ "You are a helpful AI assistant. Use the following pieces of context to answer the question at the end. \n",
125
+ "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
126
+ "Use the information to provide a concise and accurate answer.\n",
127
+ "Question: {question}\n",
128
+ "context: {context}\n",
129
+ "\"\"\"\n",
130
+ "\n",
131
+ "prompt = ChatPromptTemplate.from_template(template)"
132
+ ]
133
+ },
134
+ {
135
+ "cell_type": "code",
136
+ "execution_count": 16,
137
+ "id": "79752ec8",
138
+ "metadata": {},
139
+ "outputs": [],
140
+ "source": [
141
+ "from langchain_core.runnables import RunnablePassthrough\n",
142
+ "from langchain_core.output_parsers import StrOutputParser\n",
143
+ "rag_chain = (\n",
144
+ " {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
145
+ " | prompt\n",
146
+ " | llm\n",
147
+ " | StrOutputParser()\n",
148
+ ")"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ "execution_count": 18,
154
+ "id": "5d88e579",
155
+ "metadata": {},
156
+ "outputs": [
157
+ {
158
+ "name": "stdout",
159
+ "output_type": "stream",
160
+ "text": [
161
+ "**Madam Speaker** – the title used for the presiding officer of the U.S. House of Representatives when that officer is a woman (the Speaker of the House at the time of the address).\n",
162
+ "\n",
163
+ "**What her (the address’s) speech is about** – the President’s opening remarks to the joint session of Congress. In this portion he:\n",
164
+ "\n",
165
+ "* Acknowledges the recent COVID‑19 pandemic and the fact that the nation is now gathering together again. \n",
166
+ "* Calls for bipartisan unity – Democrats, Republicans and Independents – as “Americans” first. \n",
167
+ "* Re‑affirms the nation’s commitment to the Constitution and to freedom. \n",
168
+ "* Condemns Russia’s invasion of Ukraine, describing Vladimir Putin’s attempt to “shake the foundations of the free world” and praising the courage and determination of the Ukrainian people. \n",
169
+ "\n",
170
+ "So, “Madam Speaker” is the female Speaker of the House, and the speech she is hearing focuses on national recovery, bipartisan unity, and a strong stance against Russian aggression in Ukraine.\n"
171
+ ]
172
+ }
173
+ ],
174
+ "source": [
175
+ "print(rag_chain.invoke(\"Who is Madam Speaker and What is Her Speech About?\"))"
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": null,
181
+ "id": "a56e9e22",
182
+ "metadata": {},
183
+ "outputs": [],
184
+ "source": []
185
+ }
186
+ ],
187
+ "metadata": {
188
+ "kernelspec": {
189
+ "display_name": "RAG Project",
190
+ "language": "python",
191
+ "name": "python3"
192
+ },
193
+ "language_info": {
194
+ "codemirror_mode": {
195
+ "name": "ipython",
196
+ "version": 3
197
+ },
198
+ "file_extension": ".py",
199
+ "mimetype": "text/x-python",
200
+ "name": "python",
201
+ "nbconvert_exporter": "python",
202
+ "pygments_lexer": "ipython3",
203
+ "version": "3.11.9"
204
+ }
205
+ },
206
+ "nbformat": 4,
207
+ "nbformat_minor": 5
208
+ }
project/pipeline/agents.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  from typing import List, Literal
3
  from typing_extensions import TypedDict
4
  from pydantic import BaseModel, Field
5
- from langchain.schema import Document
6
  from langchain_core.output_parsers import StrOutputParser
7
  from langgraph.graph import END, StateGraph, START
8
  from project.pipeline.rag import RAGPipeline
 
2
  from typing import List, Literal
3
  from typing_extensions import TypedDict
4
  from pydantic import BaseModel, Field
5
+ from langchain_core.documents import Document
6
  from langchain_core.output_parsers import StrOutputParser
7
  from langgraph.graph import END, StateGraph, START
8
  from project.pipeline.rag import RAGPipeline
project/pipeline/rag.py CHANGED
@@ -1,5 +1,5 @@
1
  from typing import List, Dict, Any
2
- from langchain.schema import Document
3
  from langchain_core.output_parsers import StrOutputParser
4
  from langchain_core.runnables import RunnablePassthrough
5
  from project.source.data_preparation import DataPreparation
 
1
  from typing import List, Dict, Any
2
+ from langchain_core.documents import Document
3
  from langchain_core.output_parsers import StrOutputParser
4
  from langchain_core.runnables import RunnablePassthrough
5
  from project.source.data_preparation import DataPreparation
pyproject.toml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "rag-project"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "arxiv>=2.3.1",
9
+ "chromadb>=1.3.5",
10
+ "faiss-cpu>=1.13.0",
11
+ "fastembed>=0.7.3",
12
+ "fastapi>=0.115.0",
13
+ "flashrank>=0.2.10",
14
+ "google-generativeai>=0.8.3",
15
+ "gradio>=6.0.1",
16
+ "ipykernel>=7.1.0",
17
+ "jinja2>=3.1.0",
18
+ "langchain>=0.3.0",
19
+ "langchain-chroma>=0.1.0",
20
+ "langchain-community>=0.3.0",
21
+ "langchain-google-genai>=2.0.5",
22
+ "langchain-groq>=0.2.0",
23
+ "langchain-mistralai>=0.2.0",
24
+ "langgraph>=0.2.0",
25
+ "pillow>=11.3.0",
26
+ "pypdf>=6.4.0",
27
+ "python-dotenv>=1.2.1",
28
+ "python-multipart>=0.0.20",
29
+ "rapidocr-onnxruntime>=1.4.4",
30
+ "structlog>=25.5.0",
31
+ "tiktoken>=0.12.0",
32
+ "uvicorn>=0.34.0",
33
+ ]
requirements.txt CHANGED
@@ -18,3 +18,5 @@ python-multipart
18
  rapidocr-onnxruntime
19
  tiktoken
20
  uvicorn
 
 
 
18
  rapidocr-onnxruntime
19
  tiktoken
20
  uvicorn
21
+
22
+ langchain-core
uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
workflow.png ADDED