bkbilal09 commited on
Commit
819740c
Β·
verified Β·
1 Parent(s): 67fd7e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -24
app.py CHANGED
@@ -1,3 +1,11 @@
 
 
 
 
 
 
 
 
1
  import os
2
  import faiss
3
  import gradio as gr
@@ -5,31 +13,27 @@ from groq import Groq
5
  from sentence_transformers import SentenceTransformer
6
  from pypdf import PdfReader
7
 
8
- # Use Hugging Face secret for API key
 
9
  client = Groq(api_key=os.environ["GROQ_API_KEY"])
10
 
11
- # Embedding model
12
  embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
13
 
14
- # Global FAISS index and chunks
15
  index = None
16
  chunks = []
17
  chat_history = []
18
 
19
- # Chunking with overlap
20
  def chunk_text(text, chunk_size=200, overlap=50):
21
  words = text.split()
22
- chunks = []
23
- for i in range(0, len(words), chunk_size - overlap):
24
- chunk = " ".join(words[i:i+chunk_size])
25
- chunks.append(chunk)
26
- return chunks
27
 
28
- # Load and process uploaded files
29
  def process_files(files):
30
  global index, chunks
31
  chunks = []
32
-
33
  try:
34
  for file in files:
35
  if file.name.endswith(".pdf"):
@@ -46,7 +50,6 @@ def process_files(files):
46
  if not chunks:
47
  return "⚠️ No text found in uploaded files."
48
 
49
- # Create embeddings
50
  embeddings = embedder.encode(chunks)
51
  dimension = embeddings.shape[1]
52
  index = faiss.IndexFlatL2(dimension)
@@ -56,7 +59,7 @@ def process_files(files):
56
  except Exception as e:
57
  return f"❌ Error processing files: {str(e)}"
58
 
59
- # Retriever
60
  def retrieve(query, k=3):
61
  if index is None:
62
  return ["⚠️ No files uploaded yet."]
@@ -64,7 +67,7 @@ def retrieve(query, k=3):
64
  D, I = index.search(q_emb, k)
65
  return [chunks[i] for i in I[0]]
66
 
67
- # RAG pipeline with chat history
68
  def rag_pipeline(query, model_choice):
69
  retrieved = retrieve(query)
70
  context = "\n".join(retrieved)
@@ -80,18 +83,12 @@ def rag_pipeline(query, model_choice):
80
  except Exception as e:
81
  return f"❌ Error generating answer: {str(e)}", chat_history
82
 
83
- # Gradio UI
84
  with gr.Blocks() as demo:
85
- gr.Markdown(
86
- """
87
- # 🌟 ContextPilot Bilal
88
- ### Upload multiple documents and ask optimized questions
89
- ---
90
- """
91
- )
92
 
93
  with gr.Tab("Upload Files"):
94
- file_input = gr.File(label="πŸ“‚ Upload PDF or Text Files", file_types=[".pdf", ".txt"], type="file", file_types_multiple=True)
95
  process_btn = gr.Button("πŸš€ Process Files")
96
  status_output = gr.Textbox(label="Status", interactive=False)
97
  process_btn.click(process_files, inputs=file_input, outputs=status_output)
@@ -109,5 +106,5 @@ with gr.Blocks() as demo:
109
 
110
  ask_btn.click(rag_pipeline, inputs=[query_input, model_choice], outputs=[answer_output, history_output])
111
 
112
- # βœ… Pass theme in launch (Gradio 6.0 change)
113
  demo.launch(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="violet"))
 
1
+ # ============================================
2
+ # RAGify Bilal - Optimized RAG Chatbot
3
+ # ============================================
4
+
5
+ # 1. Install dependencies (Colab only, skip in Hugging Face Spaces)
6
+ !pip install faiss-cpu gradio groq pypdf sentence-transformers
7
+
8
+ # 2. Imports
9
  import os
10
  import faiss
11
  import gradio as gr
 
13
  from sentence_transformers import SentenceTransformer
14
  from pypdf import PdfReader
15
 
16
+ # 3. Setup Groq API key (replace with your key or set via Hugging Face Secrets)
17
+ os.environ["GROQ_API_KEY"] = "YOUR_GROQ_API_KEY"
18
  client = Groq(api_key=os.environ["GROQ_API_KEY"])
19
 
20
+ # 4. Load embedding model
21
  embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
22
 
23
+ # 5. Globals
24
  index = None
25
  chunks = []
26
  chat_history = []
27
 
28
+ # 6. Chunking with overlap
29
  def chunk_text(text, chunk_size=200, overlap=50):
30
  words = text.split()
31
+ return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size - overlap)]
 
 
 
 
32
 
33
+ # 7. Process uploaded files
34
  def process_files(files):
35
  global index, chunks
36
  chunks = []
 
37
  try:
38
  for file in files:
39
  if file.name.endswith(".pdf"):
 
50
  if not chunks:
51
  return "⚠️ No text found in uploaded files."
52
 
 
53
  embeddings = embedder.encode(chunks)
54
  dimension = embeddings.shape[1]
55
  index = faiss.IndexFlatL2(dimension)
 
59
  except Exception as e:
60
  return f"❌ Error processing files: {str(e)}"
61
 
62
+ # 8. Retriever
63
  def retrieve(query, k=3):
64
  if index is None:
65
  return ["⚠️ No files uploaded yet."]
 
67
  D, I = index.search(q_emb, k)
68
  return [chunks[i] for i in I[0]]
69
 
70
+ # 9. RAG pipeline
71
  def rag_pipeline(query, model_choice):
72
  retrieved = retrieve(query)
73
  context = "\n".join(retrieved)
 
83
  except Exception as e:
84
  return f"❌ Error generating answer: {str(e)}", chat_history
85
 
86
+ # 10. Gradio UI
87
  with gr.Blocks() as demo:
88
+ gr.Markdown("# 🌟 ContextPilot Bilal\n### Upload documents and ask optimized questions")
 
 
 
 
 
 
89
 
90
  with gr.Tab("Upload Files"):
91
+ file_input = gr.File(label="πŸ“‚ Upload PDF or Text Files", file_types=[".pdf", ".txt"], type="file", file_types="multiple")
92
  process_btn = gr.Button("πŸš€ Process Files")
93
  status_output = gr.Textbox(label="Status", interactive=False)
94
  process_btn.click(process_files, inputs=file_input, outputs=status_output)
 
106
 
107
  ask_btn.click(rag_pipeline, inputs=[query_input, model_choice], outputs=[answer_output, history_output])
108
 
109
+ # βœ… Theme must be passed in launch
110
  demo.launch(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="violet"))