File size: 3,210 Bytes
d50971b
 
 
 
 
 
 
 
 
 
 
 
 
846c75c
 
 
d50971b
846c75c
d50971b
 
 
 
846c75c
 
 
 
 
62de75f
 
 
 
 
 
 
 
 
 
846c75c
d50971b
62de75f
 
846c75c
62de75f
 
 
 
 
 
 
 
 
d50971b
 
 
846c75c
62de75f
d50971b
 
 
 
 
 
 
 
 
62de75f
 
efaa555
 
62de75f
 
 
 
 
 
 
 
 
 
 
 
 
 
d50971b
 
846c75c
62de75f
 
 
 
 
 
 
 
 
846c75c
62de75f
 
d50971b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os
import faiss
import gradio as gr
from groq import Groq
from sentence_transformers import SentenceTransformer
from pypdf import PdfReader

# Use Hugging Face secret for API key
client = Groq(api_key=os.environ["GROQ_API_KEY"])

# Embedding model
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Global FAISS index and chunks
index = None
chunks = []

# Chunking function
def chunk_text(text, chunk_size=200):
    words = text.split()
    return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

# Load and process uploaded file
def process_file(file):
    global index, chunks
    chunks = []

    try:
        if file.name.endswith(".pdf"):
            reader = PdfReader(file.name)
            for page in reader.pages:
                text = page.extract_text()
                if text:
                    chunks.extend(chunk_text(text))
        else:
            with open(file.name, "r", encoding="utf-8") as f:
                text = f.read()
                chunks.extend(chunk_text(text))

        if not chunks:
            return "⚠️ No text found in file."

        # Create embeddings
        embeddings = embedder.encode(chunks)
        dimension = embeddings.shape[1]
        index = faiss.IndexFlatL2(dimension)
        index.add(embeddings)

        return f"✅ File processed successfully with {len(chunks)} chunks."
    except Exception as e:
        return f"❌ Error processing file: {str(e)}"

# Retriever
def retrieve(query, k=3):
    if index is None:
        return ["⚠️ No file uploaded yet."]
    q_emb = embedder.encode([query])
    D, I = index.search(q_emb, k)
    return [chunks[i] for i in I[0]]

# RAG pipeline
def rag_pipeline(query):
    retrieved = retrieve(query)
    context = "\n".join(retrieved)
    prompt = f"Answer the question using context:\n{context}\n\nQuestion: {query}\nAnswer:"
    try:
        response = client.chat.completions.create(
            # ✅ Use a supported Groq model
            model="llama-3.1-8b-instant",
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"❌ Error generating answer: {str(e)}"

# Gradio UI with professional theme
with gr.Blocks(theme=gr.themes.Monochrome(primary_hue="blue", secondary_hue="violet")) as demo:
    gr.Markdown(
        """
        # 🌟 RAGify Bilal
        ### Upload your document and ask smart questions
        ---
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            file_input = gr.File(label="📂 Upload PDF or Text File", file_types=[".pdf", ".txt"])
            process_btn = gr.Button("🚀 Process File")
            status_output = gr.Textbox(label="Status", interactive=False)

        with gr.Column(scale=2):
            query_input = gr.Textbox(label="💬 Ask a Question")
            ask_btn = gr.Button("🔍 Get Answer")
            answer_output = gr.Textbox(label="✨ Answer", interactive=False)

    process_btn.click(process_file, inputs=file_input, outputs=status_output)
    ask_btn.click(rag_pipeline, inputs=query_input, outputs=answer_output)

demo.launch()