Dani786 commited on
Commit
722f7a0
Β·
verified Β·
1 Parent(s): 6d05c60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -23
app.py CHANGED
@@ -1,17 +1,18 @@
1
  import os
2
- import fitz
3
  import faiss
4
  import numpy as np
5
  import gradio as gr
6
  from groq import Groq
7
  from sentence_transformers import SentenceTransformer
8
 
9
- # === SET YOUR GROQ API KEY HERE ===
10
- os.environ["GROQ_API_KEY"] = "sk-your_actual_key_here"
11
  client = Groq(api_key=os.environ["GROQ_API_KEY"])
 
 
12
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
13
 
14
- # === PDF β†’ Text ===
15
  def extract_text_from_pdf(pdf_path):
16
  text = ""
17
  with fitz.open(pdf_path) as doc:
@@ -19,7 +20,7 @@ def extract_text_from_pdf(pdf_path):
19
  text += page.get_text()
20
  return text
21
 
22
- # === Chunking ===
23
  def chunk_text(text, chunk_size=500):
24
  sentences = text.split(". ")
25
  chunks, current = [], ""
@@ -33,7 +34,7 @@ def chunk_text(text, chunk_size=500):
33
  chunks.append(current.strip())
34
  return chunks
35
 
36
- # === Embedding + FAISS ===
37
  class VectorStore:
38
  def __init__(self):
39
  self.index = faiss.IndexFlatL2(384)
@@ -44,14 +45,14 @@ class VectorStore:
44
  self.chunks.extend(texts)
45
 
46
  def search(self, query, top_k=5):
47
- query_vec = embedding_model.encode([query])
48
- D, I = self.index.search(np.array(query_vec), top_k)
49
  return [self.chunks[i] for i in I[0]]
50
 
51
  vs = VectorStore()
52
- system_prompt = "You are a study supervisor helping students understand their documents."
53
 
54
- # === Groq LLaMA 3 Inference ===
55
  def ask_llama3(system_prompt, user_prompt):
56
  try:
57
  result = client.chat.completions.create(
@@ -65,7 +66,7 @@ def ask_llama3(system_prompt, user_prompt):
65
  except Exception as e:
66
  return f"❌ Groq API Error: {e}"
67
 
68
- # === Gradio Logic ===
69
  def upload_pdf(pdf_file):
70
  try:
71
  text = extract_text_from_pdf(pdf_file.name)
@@ -74,31 +75,32 @@ def upload_pdf(pdf_file):
74
  vs.add(embeddings, chunks)
75
  return "βœ… Document uploaded and processed!"
76
  except Exception as e:
77
- return f"❌ Error in PDF processing: {e}"
78
 
 
79
  def ask_question(question):
80
  if not vs.chunks:
81
- return "⚠️ Please upload a document first."
82
  try:
83
  docs = vs.search(question)
84
  context = "\n".join(docs)
85
- user_prompt = f"Use this context to answer the question:\n\n{context}\n\nQuestion: {question}"
86
- return ask_llama3(system_prompt, user_prompt)
87
  except Exception as e:
88
- return f"❌ Error during question answering: {e}"
89
 
90
  # === Gradio UI ===
91
  with gr.Blocks() as demo:
92
- gr.Markdown("## πŸ“š RAG PDF QA with LLaMA3 + Groq")
93
  with gr.Row():
94
- pdf_file = gr.File(label="Upload PDF")
95
  upload_button = gr.Button("Process PDF")
96
  with gr.Row():
97
- user_question = gr.Textbox(label="Ask your question here")
98
- submit_button = gr.Button("Ask")
99
- answer_box = gr.Textbox(label="Answer", lines=5)
100
 
101
- upload_button.click(upload_pdf, inputs=pdf_file, outputs=answer_box)
102
- submit_button.click(ask_question, inputs=user_question, outputs=answer_box)
103
 
104
  demo.launch()
 
1
  import os
2
+ import fitz # PyMuPDF
3
  import faiss
4
  import numpy as np
5
  import gradio as gr
6
  from groq import Groq
7
  from sentence_transformers import SentenceTransformer
8
 
9
+ # βœ… Load Groq API key from Hugging Face Secrets
 
10
  client = Groq(api_key=os.environ["GROQ_API_KEY"])
11
+
12
+ # βœ… Sentence embedding model
13
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
14
 
15
+ # === PDF β†’ Text extraction ===
16
  def extract_text_from_pdf(pdf_path):
17
  text = ""
18
  with fitz.open(pdf_path) as doc:
 
20
  text += page.get_text()
21
  return text
22
 
23
+ # === Chunking text ===
24
  def chunk_text(text, chunk_size=500):
25
  sentences = text.split(". ")
26
  chunks, current = [], ""
 
34
  chunks.append(current.strip())
35
  return chunks
36
 
37
+ # === Vector store (FAISS) ===
38
  class VectorStore:
39
  def __init__(self):
40
  self.index = faiss.IndexFlatL2(384)
 
45
  self.chunks.extend(texts)
46
 
47
  def search(self, query, top_k=5):
48
+ vec = embedding_model.encode([query])
49
+ _, I = self.index.search(np.array(vec), top_k)
50
  return [self.chunks[i] for i in I[0]]
51
 
52
  vs = VectorStore()
53
+ system_prompt = "You are a study supervisor helping students understand their uploaded documents."
54
 
55
+ # === Ask LLaMA 3 using Groq ===
56
  def ask_llama3(system_prompt, user_prompt):
57
  try:
58
  result = client.chat.completions.create(
 
66
  except Exception as e:
67
  return f"❌ Groq API Error: {e}"
68
 
69
+ # === PDF upload handler ===
70
  def upload_pdf(pdf_file):
71
  try:
72
  text = extract_text_from_pdf(pdf_file.name)
 
75
  vs.add(embeddings, chunks)
76
  return "βœ… Document uploaded and processed!"
77
  except Exception as e:
78
+ return f"❌ PDF Processing Error: {e}"
79
 
80
+ # === QA handler ===
81
  def ask_question(question):
82
  if not vs.chunks:
83
+ return "⚠️ Please upload and process a PDF document first."
84
  try:
85
  docs = vs.search(question)
86
  context = "\n".join(docs)
87
+ prompt = f"Use the context below to answer the question.\n\nContext:\n{context}\n\nQuestion: {question}"
88
+ return ask_llama3(system_prompt, prompt)
89
  except Exception as e:
90
+ return f"❌ Question Answering Error: {e}"
91
 
92
  # === Gradio UI ===
93
  with gr.Blocks() as demo:
94
+ gr.Markdown("## πŸ“š RAG PDF QA using LLaMA3 via Groq API")
95
  with gr.Row():
96
+ pdf_file = gr.File(label="Upload PDF Document")
97
  upload_button = gr.Button("Process PDF")
98
  with gr.Row():
99
+ question = gr.Textbox(label="Ask a question from the document")
100
+ ask_button = gr.Button("Ask")
101
+ answer = gr.Textbox(label="Answer", lines=6)
102
 
103
+ upload_button.click(upload_pdf, inputs=pdf_file, outputs=answer)
104
+ ask_button.click(ask_question, inputs=question, outputs=answer)
105
 
106
  demo.launch()