Spaces:
Runtime error
Runtime error
File size: 9,953 Bytes
4a70e25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 |
# -*- coding: utf-8 -*-
"""final_app
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1pG3uDsJzglvQecdTcY76aXa5ObFadRux
"""
# !pip install gradio langchain langchain-community langchain-huggingface langchain-groq faiss-cpu sentence-transformers pypdf
import gradio as gr
import os
import tempfile
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
# Groq API Key
GROQ_API_KEY = "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"
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
# Global variables to store vectorstore and processed files
vectorstore = None
processed_files_list = []
def process_pdfs(files):
"""Process uploaded PDF files and create vector store"""
global vectorstore, processed_files_list
if not files:
return "β οΈ Please upload at least one PDF file", ""
try:
all_documents = []
processed_names = []
# Process each uploaded PDF
for file in files:
# Load PDF
loader = PyPDFLoader(file.name)
documents = loader.load()
all_documents.extend(documents)
processed_names.append(os.path.basename(file.name))
if not all_documents:
return "β No content extracted from PDFs", ""
# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
splits = text_splitter.split_documents(all_documents)
# Create embeddings
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={'device': 'cpu'}
)
# Create vector store
vectorstore = FAISS.from_documents(splits, embeddings)
processed_files_list = processed_names
success_msg = f"β
Successfully processed {len(files)} document(s)!\n"
success_msg += f"π Created {len(splits)} text chunks for retrieval\n\n"
success_msg += "π Processed files:\n" + "\n".join([f" β’ {name}" for name in processed_names])
return success_msg, "β
Documents processed! You can now ask questions."
except Exception as e:
return f"β Error processing documents: {str(e)}", ""
def answer_question(question, chat_history):
"""Answer questions based on the processed documents"""
global vectorstore
if not vectorstore:
return chat_history + [[question, "β οΈ Please upload and process PDF documents first!"]]
if not question or question.strip() == "":
return chat_history + [[question, "β οΈ Please enter a valid question."]]
try:
# Initialize LLM with stricter temperature for factual answers
llm = ChatGroq(
model="llama-3.1-8b-instant",
temperature=0, # Set to 0 for most deterministic, factual responses
max_tokens=1024,
api_key=GROQ_API_KEY
)
# Create custom prompt with strict context-only answering
prompt_template = """You are a helpful assistant that answers questions ONLY based on the provided context from uploaded PDF documents.
CRITICAL INSTRUCTIONS:
- Answer ONLY if the information is present in the context below
- If the context does not contain relevant information to answer the question, you MUST respond with: "I don't know the answer. This information is not available in the uploaded documents."
- DO NOT use any external knowledge or information not present in the context
- DO NOT make assumptions or inferences beyond what is explicitly stated in the context
- If you're unsure whether the context contains the answer, say you don't know
Context from uploaded documents:
{context}
Question: {question}
Answer (only from the context above):"""
PROMPT = PromptTemplate(
template=prompt_template,
input_variables=["context", "question"]
)
# Create retrieval chain with enhanced retrieval settings
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever(
search_type="similarity",
search_kwargs={
"k": 5, # Retrieve top 5 most relevant chunks
"fetch_k": 20 # Fetch more candidates before filtering
}
),
chain_type_kwargs={"prompt": PROMPT},
return_source_documents=True
)
# Get response
result = qa_chain({"query": question})
answer = result['result']
source_docs = result.get('source_documents', [])
# Add source information if available
if source_docs and "don't know" not in answer.lower():
answer += "\n\nπ **Sources found in documents:**"
unique_sources = set()
for doc in source_docs[:3]: # Show top 3 sources
source = doc.metadata.get('source', 'Unknown')
page = doc.metadata.get('page', 'Unknown')
source_id = f"{source} (Page {page})"
if source_id not in unique_sources:
unique_sources.add(source_id)
for source in unique_sources:
answer += f"\n β’ {source}"
# Update chat history
chat_history = chat_history + [[question, answer]]
return chat_history
except Exception as e:
error_msg = f"β Error generating answer: {str(e)}"
return chat_history + [[question, error_msg]]
def clear_data():
"""Clear all processed data"""
global vectorstore, processed_files_list
vectorstore = None
processed_files_list = []
return "ποΈ All data cleared. Please upload new documents.", "", []
# Custom CSS for better styling
custom_css = """
#title {
text-align: center;
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 2.5em;
font-weight: bold;
margin-bottom: 10px;
}
#subtitle {
text-align: center;
color: #666;
font-size: 1.2em;
margin-bottom: 20px;
}
.gradio-container {
max-width: 1200px !important;
margin: auto !important;
}
"""
# Create Gradio interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
# Header
gr.HTML("<h1 id='title'>π Slashbyte RAG</h1>")
gr.HTML("<p id='subtitle'>Upload PDFs and ask questions using AI-powered retrieval</p>")
with gr.Row():
# Left column - Document Upload
with gr.Column(scale=1):
gr.Markdown("### π Document Upload")
file_upload = gr.File(
label="Upload PDF Documents",
file_types=[".pdf"],
file_count="multiple"
)
process_btn = gr.Button("π Process Documents", variant="primary", size="lg")
process_output = gr.Textbox(
label="Processing Status",
lines=8,
interactive=False
)
clear_btn = gr.Button("ποΈ Clear All Data", variant="stop")
gr.Markdown("""
---
### βΉοΈ How to Use
1. **Upload PDFs** using the file uploader
2. Click **Process Documents**
3. **Ask questions** in the chat
4. Get **AI-powered answers**
**Features:**
- π Multiple PDF support
- π€ Powered by Groq LLM
- π Semantic search
- πΎ Chat history
""")
# Right column - Chat Interface
with gr.Column(scale=2):
gr.Markdown("### π¬ Ask Questions")
status_text = gr.Textbox(
label="Status",
value="β οΈ Upload and process documents to start",
interactive=False
)
chatbot = gr.Chatbot(
label="Chat History",
height=400,
show_label=True
)
with gr.Row():
question_input = gr.Textbox(
label="Your Question",
placeholder="Ask anything about your documents...",
scale=4
)
submit_btn = gr.Button("π Ask", variant="primary", scale=1)
clear_chat_btn = gr.Button("π§Ή Clear Chat")
# Footer
gr.HTML("""
<div style='text-align: center; color: #666; padding: 20px; margin-top: 20px; border-top: 1px solid #ddd;'>
<p>Powered by Langchain, Groq, and HuggingFace | Built with β€οΈ using Gradio</p>
</div>
""")
# Event handlers
process_btn.click(
fn=process_pdfs,
inputs=[file_upload],
outputs=[process_output, status_text]
)
submit_btn.click(
fn=answer_question,
inputs=[question_input, chatbot],
outputs=[chatbot]
).then(
lambda: "",
outputs=[question_input]
)
question_input.submit(
fn=answer_question,
inputs=[question_input, chatbot],
outputs=[chatbot]
).then(
lambda: "",
outputs=[question_input]
)
clear_chat_btn.click(
fn=lambda: [],
outputs=[chatbot]
)
clear_btn.click(
fn=clear_data,
outputs=[process_output, status_text, chatbot]
)
# Launch the app
if __name__ == "__main__":
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860
) |