Shipmaster1 commited on
Commit
5451a57
·
verified ·
1 Parent(s): 65b508e

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/How-to-Build-a-Career-in-AI.pdf filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,5 +1,11 @@
1
  import gradio as gr
2
  import os
 
 
 
 
 
 
3
 
4
  def load_notebook():
5
  notebook_path = "Pythonic_RAG_Assignment.ipynb"
@@ -32,5 +38,102 @@ with gr.Blocks() as demo:
32
  3. Run: `jupyter notebook Pythonic_RAG_Assignment.ipynb`
33
  """)
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  if __name__ == "__main__":
 
36
  demo.launch()
 
1
  import gradio as gr
2
  import os
3
+ from aimakerspace.text_utils import PDFLoader, CharacterTextSplitter
4
+ from aimakerspace.vectordatabase import VectorDatabase
5
+ from aimakerspace.openai_utils.prompts import SystemRolePrompt, UserRolePrompt
6
+ from aimakerspace.openai_utils.chatmodel import ChatOpenAI
7
+ from aimakerspace.openai_utils.embedding import EmbeddingModel
8
+ import asyncio
9
 
10
  def load_notebook():
11
  notebook_path = "Pythonic_RAG_Assignment.ipynb"
 
38
  3. Run: `jupyter notebook Pythonic_RAG_Assignment.ipynb`
39
  """)
40
 
41
+ # Initialize the RAG pipeline
42
+ def initialize_rag():
43
+ # Load the PDF
44
+ pdf_loader = PDFLoader("data/How-to-Build-a-Career-in-AI.pdf")
45
+ documents = pdf_loader.load_documents()
46
+
47
+ # Split the documents
48
+ text_splitter = CharacterTextSplitter(chunk_size=1500, chunk_overlap=300)
49
+ split_documents = text_splitter.split_texts(documents)
50
+
51
+ # Create vector database
52
+ embedding_model = EmbeddingModel()
53
+ vector_db = VectorDatabase(embedding_model=embedding_model)
54
+ vector_db = asyncio.run(vector_db.abuild_from_list(split_documents))
55
+
56
+ # Set up prompts
57
+ RAG_PROMPT_TEMPLATE = """ \
58
+ Use the provided context to answer the user's query.
59
+
60
+ You may not answer the user's query unless there is specific context in the following text.
61
+
62
+ If you do not know the answer, or cannot answer, please respond with "I don't know".
63
+ """
64
+ rag_prompt = SystemRolePrompt(RAG_PROMPT_TEMPLATE)
65
+
66
+ USER_PROMPT_TEMPLATE = """ \
67
+ Context:
68
+ {context}
69
+
70
+ User Query:
71
+ {user_query}
72
+ """
73
+ user_prompt = UserRolePrompt(USER_PROMPT_TEMPLATE)
74
+
75
+ # Create ChatOpenAI instance
76
+ chat_openai = ChatOpenAI()
77
+
78
+ # Create and return pipeline
79
+ return RetrievalAugmentedQAPipeline(vector_db_retriever=vector_db, llm=chat_openai)
80
+
81
+ class RetrievalAugmentedQAPipeline:
82
+ def __init__(self, llm: ChatOpenAI, vector_db_retriever: VectorDatabase) -> None:
83
+ self.llm = llm
84
+ self.vector_db_retriever = vector_db_retriever
85
+
86
+ def run_pipeline(self, user_query: str) -> str:
87
+ context_list = self.vector_db_retriever.search_by_text(user_query, k=4)
88
+ context_prompt = ""
89
+ for context in context_list:
90
+ context_prompt += context[0] + "\n"
91
+
92
+ formatted_system_prompt = SystemRolePrompt(""" \
93
+ Use the provided context to answer the user's query.
94
+ You may not answer the user's query unless there is specific context in the following text.
95
+ If you do not know the answer, or cannot answer, please respond with "I don't know".
96
+ """).create_message()
97
+
98
+ formatted_user_prompt = UserRolePrompt(""" \
99
+ Context:
100
+ {context}
101
+
102
+ User Query:
103
+ {user_query}
104
+ """).create_message(user_query=user_query, context=context_prompt)
105
+
106
+ response = self.llm.run([formatted_system_prompt, formatted_user_prompt])
107
+ return response
108
+
109
+ # Create Gradio interface
110
+ def create_interface():
111
+ # Initialize RAG pipeline
112
+ rag_pipeline = initialize_rag()
113
+
114
+ def query_rag(question):
115
+ return rag_pipeline.run_pipeline(question)
116
+
117
+ with gr.Blocks(title="RAG Implementation") as demo:
118
+ gr.Markdown("# RAG Implementation Demo")
119
+ gr.Markdown("Ask questions about the 'How to Build a Career in AI' document")
120
+
121
+ with gr.Row():
122
+ with gr.Column():
123
+ question = gr.Textbox(label="Your Question", placeholder="Type your question here...")
124
+ submit_btn = gr.Button("Submit")
125
+
126
+ with gr.Column():
127
+ answer = gr.Textbox(label="Answer", lines=5)
128
+
129
+ submit_btn.click(
130
+ fn=query_rag,
131
+ inputs=question,
132
+ outputs=answer
133
+ )
134
+
135
+ return demo
136
+
137
  if __name__ == "__main__":
138
+ demo = create_interface()
139
  demo.launch()
data/How-to-Build-a-Career-in-AI.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f510e0cf2d5a646f5d6fc253c153ef0a129a598bd3cab8379d395d535026332d
3
+ size 3717673
data/PMarcaBlogs.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,11 +1,14 @@
1
  jupyter
2
  notebook
3
  ipykernel
4
- numpy
5
  pandas
6
  matplotlib
7
  scikit-learn
8
  transformers
9
  datasets
10
  huggingface_hub
11
- openai
 
 
 
 
1
  jupyter
2
  notebook
3
  ipykernel
4
+ numpy>=1.24.0
5
  pandas
6
  matplotlib
7
  scikit-learn
8
  transformers
9
  datasets
10
  huggingface_hub
11
+ openai>=1.0.0
12
+ python-dotenv>=1.0.0
13
+ aimakerspace>=0.1.0
14
+ asyncio>=3.4.3