Spaces:
Build error
Build error
File size: 4,580 Bytes
b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 4bceb17 cd607e1 b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 b314b52 cd607e1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import streamlit as st
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_ollama import OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
st.markdown("""
<style>
.stApp {
background-color: #0E1117;
color: #FFFFFF;
}
/* Chat Input Styling */
.stChatInput input {
background-color: #1E1E1E !important;
color: #FFFFFF !important;
border: 1px solid #3A3A3A !important;
}
/* User Message Styling */
.stChatMessage[data-testid="stChatMessage"]:nth-child(odd) {
background-color: #1E1E1E !important;
border: 1px solid #3A3A3A !important;
color: #E0E0E0 !important;
border-radius: 10px;
padding: 15px;
margin: 10px 0;
}
/* Assistant Message Styling */
.stChatMessage[data-testid="stChatMessage"]:nth-child(even) {
background-color: #2A2A2A !important;
border: 1px solid #404040 !important;
color: #F0F0F0 !important;
border-radius: 10px;
padding: 15px;
margin: 10px 0;
}
/* Avatar Styling */
.stChatMessage .avatar {
background-color: #00FFAA !important;
color: #000000 !important;
}
/* Text Color Fix */
.stChatMessage p, .stChatMessage div {
color: #FFFFFF !important;
}
.stFileUploader {
background-color: #1E1E1E;
border: 1px solid #3A3A3A;
border-radius: 5px;
padding: 15px;
}
h1, h2, h3 {
color: #00FFAA !important;
}
</style>
""", unsafe_allow_html=True)
PROMPT_TEMPLATE = """
You are an expert research assistant. Use the provided context to answer the query.
If unsure, state that you don't know. Be concise and factual (max 3 sentences).
Query: {user_query}
Context: {document_context}
Answer:
"""
PDF_STORAGE_PATH = ''
EMBEDDING_MODEL = OllamaEmbeddings(model="deepseek-r1:1.5b")
DOCUMENT_VECTOR_DB = InMemoryVectorStore(EMBEDDING_MODEL)
LANGUAGE_MODEL = OllamaLLM(model="deepseek-r1:1.5b")
def save_uploaded_file(uploaded_file):
file_path = PDF_STORAGE_PATH + uploaded_file.name
with open(file_path, "wb") as file:
file.write(uploaded_file.getbuffer())
return file_path
def load_pdf_documents(file_path):
document_loader = PDFPlumberLoader(file_path)
return document_loader.load()
def chunk_documents(raw_documents):
text_processor = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
add_start_index=True
)
return text_processor.split_documents(raw_documents)
def index_documents(document_chunks):
DOCUMENT_VECTOR_DB.add_documents(document_chunks)
def find_related_documents(query):
return DOCUMENT_VECTOR_DB.similarity_search(query)
def generate_answer(user_query, context_documents):
context_text = "\n\n".join([doc.page_content for doc in context_documents])
conversation_prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
response_chain = conversation_prompt | LANGUAGE_MODEL
return response_chain.invoke({"user_query": user_query, "document_context": context_text})
# UI Configuration
st.title("๐ DocuMind AI")
st.markdown("### Your Intelligent Document Assistant")
st.markdown("---")
# File Upload Section
uploaded_pdf = st.file_uploader(
"Upload Research Document (PDF)",
type="pdf",
help="Select a PDF document for analysis",
accept_multiple_files=False
)
if uploaded_pdf:
saved_path = save_uploaded_file(uploaded_pdf)
raw_docs = load_pdf_documents(saved_path)
processed_chunks = chunk_documents(raw_docs)
index_documents(processed_chunks)
st.success("โ
Document processed successfully! Ask your questions below.")
user_input = st.chat_input("Enter your question about the document...")
if user_input:
with st.chat_message("user"):
st.write(user_input)
with st.spinner("Analyzing document..."):
relevant_docs = find_related_documents(user_input)
ai_response = generate_answer(user_input, relevant_docs)
with st.chat_message("assistant", avatar="๐ค"):
st.write(ai_response) |