lol040604lol commited on
Commit
812d945
·
verified ·
1 Parent(s): 9050fa9

Create rag_deep.py

Browse files
Files changed (1) hide show
  1. rag_deep.py +147 -0
rag_deep.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.document_loaders import PDFPlumberLoader
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from langchain_core.vectorstores import InMemoryVectorStore
5
+ from langchain_ollama import OllamaEmbeddings
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from langchain_ollama.llms import OllamaLLM
8
+
9
+ st.markdown("""
10
+ <style>
11
+ .stApp {
12
+ background-color: #0E1117;
13
+ color: #FFFFFF;
14
+ }
15
+
16
+ /* Chat Input Styling */
17
+ .stChatInput input {
18
+ background-color: #1E1E1E !important;
19
+ color: #FFFFFF !important;
20
+ border: 1px solid #3A3A3A !important;
21
+ }
22
+
23
+ /* User Message Styling */
24
+ .stChatMessage[data-testid="stChatMessage"]:nth-child(odd) {
25
+ background-color: #1E1E1E !important;
26
+ border: 1px solid #3A3A3A !important;
27
+ color: #E0E0E0 !important;
28
+ border-radius: 10px;
29
+ padding: 15px;
30
+ margin: 10px 0;
31
+ }
32
+
33
+ /* Assistant Message Styling */
34
+ .stChatMessage[data-testid="stChatMessage"]:nth-child(even) {
35
+ background-color: #2A2A2A !important;
36
+ border: 1px solid #404040 !important;
37
+ color: #F0F0F0 !important;
38
+ border-radius: 10px;
39
+ padding: 15px;
40
+ margin: 10px 0;
41
+ }
42
+
43
+ /* Avatar Styling */
44
+ .stChatMessage .avatar {
45
+ background-color: #00FFAA !important;
46
+ color: #000000 !important;
47
+ }
48
+
49
+ /* Text Color Fix */
50
+ .stChatMessage p, .stChatMessage div {
51
+ color: #FFFFFF !important;
52
+ }
53
+
54
+ .stFileUploader {
55
+ background-color: #1E1E1E;
56
+ border: 1px solid #3A3A3A;
57
+ border-radius: 5px;
58
+ padding: 15px;
59
+ }
60
+
61
+ h1, h2, h3 {
62
+ color: #00FFAA !important;
63
+ }
64
+ </style>
65
+ """, unsafe_allow_html=True)
66
+
67
+ PROMPT_TEMPLATE = """
68
+ You are an expert research assistant. Use the provided context to answer the query.
69
+ If unsure, state that you don't know. Be concise and factual (max 3 sentences).
70
+
71
+ Query: {user_query}
72
+ Context: {document_context}
73
+ Answer:
74
+ """
75
+ PDF_STORAGE_PATH = 'document_store/pdfs/'
76
+ EMBEDDING_MODEL = OllamaEmbeddings(model="deepseek-r1:1.5b")
77
+ DOCUMENT_VECTOR_DB = InMemoryVectorStore(EMBEDDING_MODEL)
78
+ LANGUAGE_MODEL = OllamaLLM(model="deepseek-r1:1.5b")
79
+
80
+
81
+ def save_uploaded_file(uploaded_file):
82
+ file_path = PDF_STORAGE_PATH + uploaded_file.name
83
+ with open(file_path, "wb") as file:
84
+ file.write(uploaded_file.getbuffer())
85
+ return file_path
86
+
87
+ def load_pdf_documents(file_path):
88
+ document_loader = PDFPlumberLoader(file_path)
89
+ return document_loader.load()
90
+
91
+ def chunk_documents(raw_documents):
92
+ text_processor = RecursiveCharacterTextSplitter(
93
+ chunk_size=1000,
94
+ chunk_overlap=200,
95
+ add_start_index=True
96
+ )
97
+ return text_processor.split_documents(raw_documents)
98
+
99
+ def index_documents(document_chunks):
100
+ DOCUMENT_VECTOR_DB.add_documents(document_chunks)
101
+
102
+ def find_related_documents(query):
103
+ return DOCUMENT_VECTOR_DB.similarity_search(query)
104
+
105
+ def generate_answer(user_query, context_documents):
106
+ context_text = "\n\n".join([doc.page_content for doc in context_documents])
107
+ conversation_prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
108
+ response_chain = conversation_prompt | LANGUAGE_MODEL
109
+ return response_chain.invoke({"user_query": user_query, "document_context": context_text})
110
+
111
+
112
+ # UI Configuration
113
+
114
+
115
+ st.title("📘 DocuMind AI")
116
+ st.markdown("### Your Intelligent Document Assistant")
117
+ st.markdown("---")
118
+
119
+ # File Upload Section
120
+ uploaded_pdf = st.file_uploader(
121
+ "Upload Research Document (PDF)",
122
+ type="pdf",
123
+ help="Select a PDF document for analysis",
124
+ accept_multiple_files=False
125
+
126
+ )
127
+
128
+ if uploaded_pdf:
129
+ saved_path = save_uploaded_file(uploaded_pdf)
130
+ raw_docs = load_pdf_documents(saved_path)
131
+ processed_chunks = chunk_documents(raw_docs)
132
+ index_documents(processed_chunks)
133
+
134
+ st.success("✅ Document processed successfully! Ask your questions below.")
135
+
136
+ user_input = st.chat_input("Enter your question about the document...")
137
+
138
+ if user_input:
139
+ with st.chat_message("user"):
140
+ st.write(user_input)
141
+
142
+ with st.spinner("Analyzing document..."):
143
+ relevant_docs = find_related_documents(user_input)
144
+ ai_response = generate_answer(user_input, relevant_docs)
145
+
146
+ with st.chat_message("assistant", avatar="🤖"):
147
+ st.write(ai_response)