File size: 3,602 Bytes
b05bc7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
819740c
b05bc7f
 
 
 
 
2739118
 
 
b05bc7f
63ab0bc
 
b05bc7f
 
 
 
 
63ab0bc
b05bc7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13235f2
 
 
 
b05bc7f
a4f311f
b05bc7f
 
 
f380290
b05bc7f
 
67fd7e7
819740c
b05bc7f
 
63ab0bc
b05bc7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67fd7e7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os
import faiss
import gradio as gr
from groq import Groq
from sentence_transformers import SentenceTransformer
from pypdf import PdfReader

client = Groq(api_key=os.environ["GROQ_API_KEY"])
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

index = None
chunks = []
chat_history = []

def chunk_text(text, chunk_size=200, overlap=50):
    words = text.split()
    return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size - overlap)]

def process_files(files):
    global index, chunks
    chunks = []
    try:
        if not isinstance(files, list):
            files = [files]

        for file in files:
            if file.endswith(".pdf"):
                reader = PdfReader(file)
                for page in reader.pages:
                    text = page.extract_text()
                    if text:
                        chunks.extend(chunk_text(text))
            else:
                with open(file, "r", encoding="utf-8") as f:
                    text = f.read()
                    chunks.extend(chunk_text(text))

        if not chunks:
            return "⚠️ No text found in uploaded files."

        embeddings = embedder.encode(chunks)
        dimension = embeddings.shape[1]
        index = faiss.IndexFlatL2(dimension)
        index.add(embeddings)

        return f"βœ… Processed {len(files)} file(s) with {len(chunks)} chunks."
    except Exception as e:
        return f"❌ Error processing files: {str(e)}"

def retrieve(query, k=3):
    if index is None:
        return ["⚠️ No files uploaded yet."]
    q_emb = embedder.encode([query])
    D, I = index.search(q_emb, k)
    return [chunks[i] for i in I[0]]

def rag_pipeline(query, model_choice):
    retrieved = retrieve(query)
    context = "\n".join(retrieved)
    prompt = f"Answer the question using context:\n{context}\n\nQuestion: {query}\nAnswer:"
    try:
        response = client.chat.completions.create(
            model=model_choice,
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ]
        )
        answer = response.choices[0].message.content
        chat_history.append((query, answer))
        return answer, chat_history
    except Exception as e:
        # Debugging: show raw error
        return f"❌ Error generating answer: {str(e)}", chat_history

with gr.Blocks() as demo:
    gr.Markdown("# 🌟 ContextPilot Bilal\n### Upload documents and ask optimized questions")

    with gr.Tab("Upload Files"):
        file_input = gr.File(label="πŸ“‚ Upload PDF or Text Files", file_types=[".pdf", ".txt"], type="filepath")
        process_btn = gr.Button("πŸš€ Process Files")
        status_output = gr.Textbox(label="Status", interactive=False)
        process_btn.click(process_files, inputs=file_input, outputs=status_output)

    with gr.Tab("Ask Questions"):
        query_input = gr.Textbox(label="πŸ’¬ Enter your question")
        model_choice = gr.Dropdown(
            choices=["llama-3.1-8b-instant", "llama-3.1-70b-versatile", "gemma-7b-it"],
            value="llama-3.1-8b-instant",
            label="Choose Groq Model"
        )
        ask_btn = gr.Button("πŸ” Get Answer")
        answer_output = gr.Textbox(label="✨ Answer", interactive=False)
        history_output = gr.Chatbot(label="πŸ“œ Chat History")

        ask_btn.click(rag_pipeline, inputs=[query_input, model_choice], outputs=[answer_output, history_output])

demo.launch(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="violet"))