|
|
|
|
|
"""final_app |
|
|
Automatically generated by Colab. |
|
|
Original file is located at |
|
|
https://colab.research.google.com/drive/1pG3uDsJzglvQecdTcY76aXa5ObFadRux |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
|
import os |
|
|
import tempfile |
|
|
from langchain_community.document_loaders import PyPDFLoader |
|
|
from langchain_community.vectorstores import FAISS |
|
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
|
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
|
from langchain_groq import ChatGroq |
|
|
from langchain.chains import RetrievalQA |
|
|
from langchain.prompts import PromptTemplate |
|
|
|
|
|
|
|
|
GROQ_API_KEY = "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" |
|
|
os.environ["GROQ_API_KEY"] = GROQ_API_KEY |
|
|
|
|
|
|
|
|
vectorstore = None |
|
|
processed_files_list = [] |
|
|
|
|
|
def process_pdfs(files): |
|
|
"""Process uploaded PDF files and create vector store""" |
|
|
global vectorstore, processed_files_list |
|
|
|
|
|
if not files: |
|
|
return "β οΈ Please upload at least one PDF file", "" |
|
|
|
|
|
try: |
|
|
all_documents = [] |
|
|
processed_names = [] |
|
|
|
|
|
|
|
|
for file in files: |
|
|
|
|
|
loader = PyPDFLoader(file.name) |
|
|
documents = loader.load() |
|
|
all_documents.extend(documents) |
|
|
processed_names.append(os.path.basename(file.name)) |
|
|
|
|
|
if not all_documents: |
|
|
return "β No content extracted from PDFs", "" |
|
|
|
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter( |
|
|
chunk_size=1000, |
|
|
chunk_overlap=200, |
|
|
length_function=len |
|
|
) |
|
|
splits = text_splitter.split_documents(all_documents) |
|
|
|
|
|
|
|
|
embeddings = HuggingFaceEmbeddings( |
|
|
model_name="sentence-transformers/all-MiniLM-L6-v2", |
|
|
model_kwargs={'device': 'cpu'} |
|
|
) |
|
|
|
|
|
|
|
|
vectorstore = FAISS.from_documents(splits, embeddings) |
|
|
processed_files_list = processed_names |
|
|
|
|
|
success_msg = f"β
Successfully processed {len(files)} document(s)!\n" |
|
|
success_msg += f"π Created {len(splits)} text chunks for retrieval\n\n" |
|
|
success_msg += "π Processed files:\n" + "\n".join([f" β’ {name}" for name in processed_names]) |
|
|
|
|
|
return success_msg, "β
Documents processed! You can now ask questions." |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Error processing documents: {str(e)}", "" |
|
|
|
|
|
def answer_question(question, chat_history): |
|
|
"""Answer questions based on the processed documents""" |
|
|
global vectorstore |
|
|
|
|
|
if not vectorstore: |
|
|
return chat_history + [[question, "β οΈ Please upload and process PDF documents first!"]] |
|
|
|
|
|
if not question or question.strip() == "": |
|
|
return chat_history + [[question, "β οΈ Please enter a valid question."]] |
|
|
|
|
|
try: |
|
|
|
|
|
llm = ChatGroq( |
|
|
model="llama-3.1-8b-instant", |
|
|
temperature=0, |
|
|
max_tokens=1024, |
|
|
api_key=GROQ_API_KEY |
|
|
) |
|
|
|
|
|
|
|
|
prompt_template = """You are a helpful assistant that answers questions ONLY based on the provided context from uploaded PDF documents. |
|
|
CRITICAL INSTRUCTIONS: |
|
|
- Answer ONLY if the information is present in the context below |
|
|
- If the context does not contain relevant information to answer the question, you MUST respond with: "I don't know the answer. This information is not available in the uploaded documents." |
|
|
- DO NOT use any external knowledge or information not present in the context |
|
|
- DO NOT make assumptions or inferences beyond what is explicitly stated in the context |
|
|
- If you're unsure whether the context contains the answer, say you don't know |
|
|
Context from uploaded documents: |
|
|
{context} |
|
|
Question: {question} |
|
|
Answer (only from the context above):""" |
|
|
|
|
|
PROMPT = PromptTemplate( |
|
|
template=prompt_template, |
|
|
input_variables=["context", "question"] |
|
|
) |
|
|
|
|
|
|
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
|
llm=llm, |
|
|
chain_type="stuff", |
|
|
retriever=vectorstore.as_retriever( |
|
|
search_type="similarity", |
|
|
search_kwargs={ |
|
|
"k": 5, |
|
|
"fetch_k": 20 |
|
|
} |
|
|
), |
|
|
chain_type_kwargs={"prompt": PROMPT}, |
|
|
return_source_documents=True |
|
|
) |
|
|
|
|
|
|
|
|
result = qa_chain({"query": question}) |
|
|
answer = result['result'] |
|
|
source_docs = result.get('source_documents', []) |
|
|
|
|
|
|
|
|
if source_docs and "don't know" not in answer.lower(): |
|
|
answer += "\n\nπ **Sources found in documents:**" |
|
|
unique_sources = set() |
|
|
for doc in source_docs[:3]: |
|
|
source = doc.metadata.get('source', 'Unknown') |
|
|
page = doc.metadata.get('page', 'Unknown') |
|
|
source_id = f"{source} (Page {page})" |
|
|
if source_id not in unique_sources: |
|
|
unique_sources.add(source_id) |
|
|
|
|
|
for source in unique_sources: |
|
|
answer += f"\n β’ {source}" |
|
|
|
|
|
|
|
|
chat_history = chat_history + [[question, answer]] |
|
|
|
|
|
return chat_history |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"β Error generating answer: {str(e)}" |
|
|
return chat_history + [[question, error_msg]] |
|
|
|
|
|
def clear_data(): |
|
|
"""Clear all processed data""" |
|
|
global vectorstore, processed_files_list |
|
|
vectorstore = None |
|
|
processed_files_list = [] |
|
|
return "ποΈ All data cleared. Please upload new documents.", "", [] |
|
|
|
|
|
|
|
|
custom_css = """ |
|
|
#title { |
|
|
text-align: center; |
|
|
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); |
|
|
-webkit-background-clip: text; |
|
|
-webkit-text-fill-color: transparent; |
|
|
font-size: 2.5em; |
|
|
font-weight: bold; |
|
|
margin-bottom: 10px; |
|
|
} |
|
|
#subtitle { |
|
|
text-align: center; |
|
|
color: #666; |
|
|
font-size: 1.2em; |
|
|
margin-bottom: 20px; |
|
|
} |
|
|
.gradio-container { |
|
|
max-width: 1200px !important; |
|
|
margin: auto !important; |
|
|
} |
|
|
""" |
|
|
|
|
|
|
|
|
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: |
|
|
|
|
|
gr.HTML("<h1 id='title'>π Slashbyte RAG</h1>") |
|
|
gr.HTML("<p id='subtitle'>Upload PDFs and ask questions using AI-powered retrieval</p>") |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("### π Document Upload") |
|
|
file_upload = gr.File( |
|
|
label="Upload PDF Documents", |
|
|
file_types=[".pdf"], |
|
|
file_count="multiple" |
|
|
) |
|
|
process_btn = gr.Button("π Process Documents", variant="primary", size="lg") |
|
|
process_output = gr.Textbox( |
|
|
label="Processing Status", |
|
|
lines=8, |
|
|
interactive=False |
|
|
) |
|
|
clear_btn = gr.Button("ποΈ Clear All Data", variant="stop") |
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
### βΉοΈ How to Use |
|
|
1. **Upload PDFs** using the file uploader |
|
|
2. Click **Process Documents** |
|
|
3. **Ask questions** in the chat |
|
|
4. Get **AI-powered answers** |
|
|
**Features:** |
|
|
- π Multiple PDF support |
|
|
- π€ Powered by Groq LLM |
|
|
- π Semantic search |
|
|
- πΎ Chat history |
|
|
""") |
|
|
|
|
|
|
|
|
with gr.Column(scale=2): |
|
|
gr.Markdown("### π¬ Ask Questions") |
|
|
status_text = gr.Textbox( |
|
|
label="Status", |
|
|
value="β οΈ Upload and process documents to start", |
|
|
interactive=False |
|
|
) |
|
|
chatbot = gr.Chatbot( |
|
|
label="Chat History", |
|
|
height=400, |
|
|
show_label=True |
|
|
) |
|
|
with gr.Row(): |
|
|
question_input = gr.Textbox( |
|
|
label="Your Question", |
|
|
placeholder="Ask anything about your documents...", |
|
|
scale=4 |
|
|
) |
|
|
submit_btn = gr.Button("π Ask", variant="primary", scale=1) |
|
|
|
|
|
clear_chat_btn = gr.Button("π§Ή Clear Chat") |
|
|
|
|
|
|
|
|
gr.HTML(""" |
|
|
<div style='text-align: center; color: #666; padding: 20px; margin-top: 20px; border-top: 1px solid #ddd;'> |
|
|
<p>Powered by Langchain, Groq, and HuggingFace | Built with β€οΈ using Gradio</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
|
|
|
process_btn.click( |
|
|
fn=process_pdfs, |
|
|
inputs=[file_upload], |
|
|
outputs=[process_output, status_text] |
|
|
) |
|
|
|
|
|
submit_btn.click( |
|
|
fn=answer_question, |
|
|
inputs=[question_input, chatbot], |
|
|
outputs=[chatbot] |
|
|
).then( |
|
|
lambda: "", |
|
|
outputs=[question_input] |
|
|
) |
|
|
|
|
|
question_input.submit( |
|
|
fn=answer_question, |
|
|
inputs=[question_input, chatbot], |
|
|
outputs=[chatbot] |
|
|
).then( |
|
|
lambda: "", |
|
|
outputs=[question_input] |
|
|
) |
|
|
|
|
|
clear_chat_btn.click( |
|
|
fn=lambda: [], |
|
|
outputs=[chatbot] |
|
|
) |
|
|
|
|
|
clear_btn.click( |
|
|
fn=clear_data, |
|
|
outputs=[process_output, status_text, chatbot] |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch( |
|
|
share=True, |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860 |
|
|
) |