| import os |
| import faiss |
| import gradio as gr |
| from groq import Groq |
| from sentence_transformers import SentenceTransformer |
| from pypdf import PdfReader |
|
|
| |
| client = Groq(api_key=os.environ["GROQ_API_KEY"]) |
|
|
| |
| embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") |
|
|
| |
| index = None |
| chunks = [] |
|
|
| |
| def chunk_text(text, chunk_size=200): |
| words = text.split() |
| return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] |
|
|
| |
| def process_file(file): |
| global index, chunks |
| chunks = [] |
|
|
| try: |
| if file.name.endswith(".pdf"): |
| reader = PdfReader(file.name) |
| for page in reader.pages: |
| text = page.extract_text() |
| if text: |
| chunks.extend(chunk_text(text)) |
| else: |
| with open(file.name, "r", encoding="utf-8") as f: |
| text = f.read() |
| chunks.extend(chunk_text(text)) |
|
|
| if not chunks: |
| return "โ ๏ธ No text found in file." |
|
|
| |
| embeddings = embedder.encode(chunks) |
| dimension = embeddings.shape[1] |
| index = faiss.IndexFlatL2(dimension) |
| index.add(embeddings) |
|
|
| return f"โ
File processed successfully with {len(chunks)} chunks." |
| except Exception as e: |
| return f"โ Error processing file: {str(e)}" |
|
|
| |
| def retrieve(query, k=3): |
| if index is None: |
| return ["โ ๏ธ No file uploaded yet."] |
| q_emb = embedder.encode([query]) |
| D, I = index.search(q_emb, k) |
| return [chunks[i] for i in I[0]] |
|
|
| |
| def rag_pipeline(query): |
| retrieved = retrieve(query) |
| context = "\n".join(retrieved) |
| prompt = f"Answer the question using context:\n{context}\n\nQuestion: {query}\nAnswer:" |
| try: |
| response = client.chat.completions.create( |
| |
| model="llama-3.1-8b-instant", |
| messages=[{"role": "user", "content": prompt}] |
| ) |
| return response.choices[0].message.content |
| except Exception as e: |
| return f"โ Error generating answer: {str(e)}" |
|
|
| |
| with gr.Blocks(theme=gr.themes.Monochrome(primary_hue="blue", secondary_hue="violet")) as demo: |
| gr.Markdown( |
| """ |
| # ๐ RAGify Bilal |
| ### Upload your document and ask smart questions |
| --- |
| """ |
| ) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| file_input = gr.File(label="๐ Upload PDF or Text File", file_types=[".pdf", ".txt"]) |
| process_btn = gr.Button("๐ Process File") |
| status_output = gr.Textbox(label="Status", interactive=False) |
|
|
| with gr.Column(scale=2): |
| query_input = gr.Textbox(label="๐ฌ Ask a Question") |
| ask_btn = gr.Button("๐ Get Answer") |
| answer_output = gr.Textbox(label="โจ Answer", interactive=False) |
|
|
| process_btn.click(process_file, inputs=file_input, outputs=status_output) |
| ask_btn.click(rag_pipeline, inputs=query_input, outputs=answer_output) |
|
|
| demo.launch() |
|
|