faiz0983 commited on
Commit
f768714
·
verified ·
1 Parent(s): 61e1f4c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -0
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_groq import ChatGroq
8
+ from langchain.chains import ConversationalRetrievalChain
9
+ from langchain.memory import ConversationBufferMemory
10
+
11
+ # --- 1. SETUP API ---
12
+ # In Hugging Face, we use os.environ to get the secret
13
+ api_key = os.environ.get("GROQ_API")
14
+
15
+ # --- 2. FILE LOADING LOGIC ---
16
+ def load_any(path: str):
17
+ p = path.lower()
18
+ if p.endswith(".pdf"): return PyPDFLoader(path).load()
19
+ if p.endswith(".txt"): return TextLoader(path, encoding="utf-8").load()
20
+ if p.endswith(".docx"): return Docx2txtLoader(path).load()
21
+ return []
22
+
23
+ # --- 3. PROCESSING FUNCTION ---
24
+ # This function runs when the user clicks "Build Chatbot"
25
+ def process_files(files):
26
+ if not files:
27
+ return None, "⚠️ Please upload at least one file."
28
+
29
+ if not api_key:
30
+ return None, "❌ Error: GROQ_API key not found in Secrets."
31
+
32
+ try:
33
+ # Load Documents
34
+ docs = []
35
+ for file_obj in files:
36
+ # Gradio passes file objects, we need their paths
37
+ docs.extend(load_any(file_obj.name))
38
+
39
+ if not docs:
40
+ return None, "⚠️ No readable text found in files."
41
+
42
+ # Split Text
43
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
44
+ chunks = splitter.split_documents(docs)
45
+
46
+ # Create Embeddings & Vector Store
47
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
48
+ db = FAISS.from_documents(chunks, embeddings)
49
+ retriever = db.as_retriever(search_kwargs={"k": 4})
50
+
51
+ # Create Chain
52
+ llm = ChatGroq(
53
+ groq_api_key=api_key,
54
+ model="llama-3.3-70b-versatile",
55
+ temperature=0
56
+ )
57
+
58
+ memory = ConversationBufferMemory(
59
+ memory_key="chat_history",
60
+ return_messages=True,
61
+ output_key="answer"
62
+ )
63
+
64
+ chain = ConversationalRetrievalChain.from_llm(
65
+ llm=llm,
66
+ retriever=retriever,
67
+ memory=memory,
68
+ return_source_documents=True,
69
+ output_key="answer"
70
+ )
71
+
72
+ return chain, f"✅ Success! Processed {len(chunks)} chunks. You can chat now."
73
+
74
+ except Exception as e:
75
+ return None, f"❌ Error: {str(e)}"
76
+
77
+ # --- 4. CHAT FUNCTION ---
78
+ def chat_function(message, history, chain):
79
+ if not chain:
80
+ return "⚠️ Please upload files and click 'Build Chatbot' first."
81
+
82
+ try:
83
+ res = chain.invoke({"question": message})
84
+ answer = res["answer"]
85
+
86
+ # Format Sources
87
+ sources = []
88
+ for d in res.get("source_documents", []):
89
+ src = os.path.basename(d.metadata.get("source", "unknown"))
90
+ text = (d.page_content or "").replace("\n", " ")[:100] + "..."
91
+ sources.append(f"- {src}: {text}")
92
+
93
+ final_answer = answer + "\n\n---\n**Sources:**\n" + "\n".join(sources)
94
+ return final_answer
95
+ except Exception as e:
96
+ return f"❌ Error generating answer: {str(e)}"
97
+
98
+ # --- 5. BUILD UI ---
99
+ with gr.Blocks(title="RAG Chatbot") as demo:
100
+ gr.Markdown("# 📚 RAG Chatbot (LangChain + Groq)")
101
+
102
+ # Store the RAG chain in the user's browser session (State)
103
+ chain_state = gr.State(None)
104
+
105
+ with gr.Row():
106
+ with gr.Column(scale=1):
107
+ file_input = gr.File(file_count="multiple", label="Upload PDF/TXT/DOCX")
108
+ build_btn = gr.Button("Build Chatbot", variant="primary")
109
+ status_output = gr.Textbox(label="Status", interactive=False)
110
+
111
+ with gr.Column(scale=2):
112
+ chatbot = gr.ChatInterface(
113
+ fn=chat_function,
114
+ additional_inputs=[chain_state] # Pass the chain to the chat function
115
+ )
116
+
117
+ # Connect the "Build" button to the processing function
118
+ build_btn.click(
119
+ fn=process_files,
120
+ inputs=[file_input],
121
+ outputs=[chain_state, status_output]
122
+ )
123
+
124
+ if __name__ == "__main__":
125
+ demo.launch()