duythduong commited on
Commit
2376236
·
0 Parent(s):

feat: define corebase

Browse files
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Environments
2
+ .env
3
+ .venv
4
+ env/
5
+ venv/
6
+ ENV/
7
+ env.bak/
8
+ venv.bak/
9
+
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.8-slim-buster
3
+
4
+ # Set the working directory in the container to /app
5
+ WORKDIR /app
6
+
7
+ # Add the current directory contents into the container at /app
8
+ ADD . /app
9
+
10
+ # Install any needed packages specified in requirements.txt
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Make port 80 available to the world outside this container
14
+ EXPOSE 8000
15
+
16
+ # Run app.py when the container launches
17
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
LeaveNoContextBehind.pdf ADDED
Binary file (482 kB). View file
 
README.md ADDED
File without changes
apis/configs/llm_configs.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_openai import ChatOpenAI
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+
5
+
6
+ gpt_model = ChatOpenAI(api_key=os.environ.get('OPENAI_API_KEY'), temperature=0,
7
+ request_timeout=120, streaming=True, model="gpt-3.5-turbo-0125")
8
+ gemini_model = ChatGoogleGenerativeAI(api_key=os.environ.get(
9
+ 'GOOGLE_API_KEY'), temperature=0, model="gemini-pro", request_timeout=120)
apis/configs/word_embedding_config.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from langchain_community.embeddings import HuggingFaceEmbeddings
2
+
3
+
4
+ mxbai_embedder = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
main.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uvicorn
3
+ from apis import api_v1_router
4
+ from apis.create_app import create_app
5
+ from dotenv import load_dotenv, find_dotenv
6
+
7
+ # Load environment variables from the `.env` file
8
+ load_dotenv(find_dotenv())
9
+ # Create FastAPI app instance
10
+ app = create_app()
11
+
12
+
13
+ # Add routes
14
+ app.include_router(api_v1_router, prefix="/api")
15
+
16
+
17
+ # Launch FastAPI app
18
+ if __name__ == "__main__":
19
+ uvicorn.run(app, host="0.0.0.0", port=os.environ.get("PORT", 7860))
test.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_google_genai import ChatGoogleGenerativeAI
2
+ from dotenv import load_dotenv
3
+ import os
4
+ from langchain_community.document_loaders import PyPDFLoader
5
+ from langchain import hub
6
+ from langchain_chroma import Chroma
7
+ from langchain_community.document_loaders import WebBaseLoader
8
+ from langchain_core.output_parsers import StrOutputParser
9
+ from langchain_core.runnables import RunnablePassthrough
10
+ from langchain_openai import OpenAIEmbeddings
11
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
12
+ from langchain_community.embeddings import HuggingFaceEmbeddings
13
+
14
+
15
+ mxbai_embedder = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
16
+
17
+ load_dotenv()
18
+
19
+ llm = ChatGoogleGenerativeAI(google_api_key=os.environ.get("GOOGLE_API_KEY"),
20
+ model="gemini-1.5-pro-latest")
21
+
22
+ # Load and split the PDF document into pages
23
+ pdf_loader = PyPDFLoader("LeaveNoContextBehind.pdf")
24
+ pages = pdf_loader.load_and_split()
25
+
26
+ # Split the pages into smaller chunks
27
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
28
+ splits = text_splitter.split_documents(pages)
29
+
30
+ # Create a vector store from the document splits
31
+ vectorstore = Chroma.from_documents(documents=splits, embedding=mxbai_embedder)
32
+
33
+ # Retrieve and generate using the relevant snippets of the blog
34
+ retriever = vectorstore.as_retriever()
35
+ prompt = hub.pull("rlm/rag-prompt")
36
+
37
+ def format_docs(docs):
38
+ return "\n\n".join(doc.page_content for doc in docs)
39
+
40
+ # Define the RAG chain
41
+ rag_chain = (
42
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
43
+ | prompt
44
+ | llm
45
+ | StrOutputParser()
46
+ )
47
+
48
+ # Invoke the RAG chain with a question
49
+ response = rag_chain.invoke("Can you summarize the document?")
50
+ print(response)