Amaanali01 commited on
Commit
698a416
Β·
verified Β·
1 Parent(s): 4ea1169

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import faiss
4
+ import numpy as np
5
+ from PyPDF2 import PdfReader
6
+ from sentence_transformers import SentenceTransformer
7
+ from groq import Groq
8
+
9
+ # πŸ” Groq API Key (embed securely for private use only)
10
+ GROQ_API_KEY = "your_groq_api_key_here"
11
+
12
+ # πŸ“¦ Load embedding model
13
+ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
14
+
15
+ # πŸ“‚ Global storage for vector index and chunks
16
+ chunks = []
17
+ index = None
18
+
19
+ # πŸ“„ PDF Text Extraction & Processing
20
+ def process_pdf(file):
21
+ global chunks, index
22
+ reader = PdfReader(file.name)
23
+ text = "\n".join(page.extract_text() or "" for page in reader.pages)
24
+
25
+ if not text.strip():
26
+ return "❌ No text found in the PDF. Please upload a different file."
27
+
28
+ # πŸ“ Chunking
29
+ chunk_size = 300
30
+ chunk_overlap = 50
31
+ words = text.split()
32
+ chunks = [
33
+ " ".join(words[i:i + chunk_size])
34
+ for i in range(0, len(words), chunk_size - chunk_overlap)
35
+ ]
36
+
37
+ # πŸ“Š Embeddings + FAISS
38
+ embeddings = model.encode(chunks)
39
+ dimension = embeddings.shape[1]
40
+ index = faiss.IndexFlatL2(dimension)
41
+ index.add(np.array(embeddings))
42
+
43
+ return f"βœ… Processed {len(chunks)} chunks from uploaded PDF. You can now ask questions."
44
+
45
+ # ❓ Ask a Question
46
+ def ask_question(query):
47
+ if not chunks or index is None:
48
+ return "⚠️ Please upload and process a PDF first."
49
+
50
+ query_embedding = model.encode([query])
51
+ distances, indices = index.search(np.array(query_embedding), k=3)
52
+ context = "\n".join([chunks[i] for i in indices[0]])
53
+
54
+ client = Groq(api_key=GROQ_API_KEY)
55
+ prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}"
56
+
57
+ try:
58
+ response = client.chat.completions.create(
59
+ messages=[{"role": "user", "content": prompt}],
60
+ model="llama3-70b-8192"
61
+ )
62
+ return response.choices[0].message.content
63
+ except Exception as e:
64
+ return f"❌ Error from Groq API: {str(e)}"
65
+
66
+ # πŸŽ›οΈ Gradio Interface
67
+ file_input = gr.File(label="πŸ“„ Upload PDF")
68
+ question_input = gr.Textbox(label="❓ Ask a Question about the PDF")
69
+ answer_output = gr.Textbox(label="πŸ“˜ Answer")
70
+
71
+ pdf_processor = gr.Interface(fn=process_pdf, inputs=file_input, outputs="text")
72
+ pdf_qa = gr.Interface(fn=ask_question, inputs=question_input, outputs=answer_output)
73
+
74
+ app = gr.TabbedInterface(
75
+ [pdf_processor, pdf_qa],
76
+ tab_names=["Upload PDF", "Ask a Question"]
77
+ )
78
+
79
+ app.launch()