Aniket00 commited on
Commit
b6e8184
·
verified ·
1 Parent(s): 1b0b638

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +128 -0
  2. requirement.txt +7 -0
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import tempfile
4
+
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
7
+ from langchain_community.vectorstores import Chroma
8
+ from langchain_community.embeddings import HuggingFaceEmbeddings
9
+ from langchain_groq import ChatGroq
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from langchain_community.chat_message_histories import ChatMessageHistory
12
+ from langchain_core.chat_history import BaseChatMessageHistory
13
+
14
+ # Persistent vectorstore directory
15
+ PERSIST_DIRECTORY = "./chroma_db"
16
+
17
+ # Store chat histories
18
+ chat_histories = {}
19
+
20
+ def get_session_history(session_id: str) -> BaseChatMessageHistory:
21
+ if session_id not in chat_histories:
22
+ chat_histories[session_id] = ChatMessageHistory()
23
+ return chat_histories[session_id]
24
+
25
+ def process_files(api_key, model_name, session_id, files, question):
26
+ if not api_key:
27
+ return "Please enter your Groq API key"
28
+
29
+ # Initialize LLM
30
+ llm = ChatGroq(groq_api_key=api_key, model_name=model_name)
31
+
32
+ # Process PDFs
33
+ documents = []
34
+ for file in files:
35
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
36
+ tmp.write(file)
37
+ tmp_path = tmp.name
38
+
39
+ loader = PyPDFLoader(tmp_path)
40
+ docs = loader.load()
41
+ documents.extend(docs)
42
+ os.unlink(tmp_path) # Clean up temp file
43
+
44
+ # Split and embed
45
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
46
+ splits = text_splitter.split_documents(documents)
47
+
48
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
49
+ vectorstore = Chroma.from_documents(splits, embedding=embeddings, persist_directory=PERSIST_DIRECTORY)
50
+ retriever = vectorstore.as_retriever()
51
+
52
+ # Setup RAG chain
53
+ rag_chain = ConversationalRetrievalChain.from_llm(
54
+ llm=llm,
55
+ retriever=retriever,
56
+ return_source_documents=True,
57
+ )
58
+
59
+ # Get chat history
60
+ chat_history = get_session_history(session_id)
61
+
62
+ # Process question
63
+ response = rag_chain({
64
+ "question": question,
65
+ "chat_history": [(msg.content if msg.type == "human" else "Assistant: " + msg.content)
66
+ for msg in chat_history.messages]
67
+ })
68
+
69
+ # Update history
70
+ chat_history.add_user_message(question)
71
+ chat_history.add_ai_message(response["answer"])
72
+
73
+ # Format response
74
+ output = f"**Assistant:** {response['answer']}\n\n---\n**Chat History:**\n"
75
+ for msg in chat_history.messages[-6:]: # Show last 3 exchanges
76
+ output += f"{msg.type.capitalize()}: {msg.content}\n"
77
+
78
+ return output
79
+
80
+ # Gradio Interface
81
+ with gr.Blocks(title="RAG PDF Chat") as demo:
82
+ gr.Markdown("## 📚 Conversational RAG with PDF Uploads")
83
+
84
+ with gr.Row():
85
+ with gr.Column(scale=1):
86
+ api_key = gr.Textbox(
87
+ label="Groq API Key",
88
+ type="password",
89
+ placeholder="Enter your API key"
90
+ )
91
+ model = gr.Dropdown(
92
+ label="LLM Model",
93
+ choices=[
94
+ "qwen-2.5-32b",
95
+ "deepseek-r1-distill-llama-70b",
96
+ "gemma2-9b-it",
97
+ "mixtral-8x7b-32768",
98
+ "llama-3.3-70b-versatile",
99
+ "Gemma2-9b-It"
100
+ ],
101
+ value="mixtral-8x7b-32768"
102
+ )
103
+ session_id = gr.Textbox(
104
+ label="Session ID",
105
+ value="default_session"
106
+ )
107
+
108
+ with gr.Column(scale=2):
109
+ file_input = gr.File(
110
+ label="Upload PDFs",
111
+ file_types=[".pdf"],
112
+ file_count="multiple"
113
+ )
114
+ question = gr.Textbox(
115
+ label="Your Question",
116
+ placeholder="Ask about the uploaded documents..."
117
+ )
118
+ submit_btn = gr.Button("Submit")
119
+ output = gr.Markdown()
120
+
121
+ submit_btn.click(
122
+ fn=process_files,
123
+ inputs=[api_key, model, session_id, file_input, question],
124
+ outputs=output
125
+ )
126
+
127
+ if __name__ == "__main__":
128
+ demo.launch(share=True)
requirement.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ langchain
3
+ langchain-community
4
+ langchain-groq
5
+ pypdf
6
+ chromadb
7
+ sentence-transformers