|
|
import gradio as gr |
|
|
from sentence_transformers import SentenceTransformer |
|
|
from transformers import pipeline |
|
|
import faiss |
|
|
import os |
|
|
|
|
|
|
|
|
embedder = SentenceTransformer("all-MiniLM-L6-v2") |
|
|
|
|
|
qa_pipeline = pipeline("text-generation", model="tiiuae/falcon-rw-1b") |
|
|
|
|
|
|
|
|
index = None |
|
|
docs = [] |
|
|
|
|
|
def process_docs(files): |
|
|
global index, docs |
|
|
docs = [] |
|
|
|
|
|
|
|
|
texts = [] |
|
|
for file in files: |
|
|
with open(file.name, 'r', encoding='utf-8') as f: |
|
|
content = f.read() |
|
|
|
|
|
texts.append(content) |
|
|
docs.append(content) |
|
|
|
|
|
embeddings = embedder.encode(texts, convert_to_tensor=False) |
|
|
dim = embeddings[0].shape[0] |
|
|
|
|
|
|
|
|
index = faiss.IndexFlatL2(dim) |
|
|
index.add(embeddings) |
|
|
|
|
|
return "Documents uploaded and indexed!" |
|
|
|
|
|
def chat(query): |
|
|
if not index or not docs: |
|
|
return "Please upload documents first." |
|
|
|
|
|
|
|
|
q_embedding = embedder.encode([query]) |
|
|
D, I = index.search(q_embedding, k=3) |
|
|
|
|
|
|
|
|
context = "\n".join([docs[i] for i in I[0]]) |
|
|
prompt = f"Context: {context}\nQuestion: {query}\nAnswer:" |
|
|
|
|
|
response = qa_pipeline(prompt, max_new_tokens=100, do_sample=False) |
|
|
return response[0]['generated_text'].split("Answer:")[-1].strip() |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
with gr.Row(): |
|
|
file_upload = gr.File(file_types=[".txt"], file_count="multiple") |
|
|
upload_btn = gr.Button("Upload Documents") |
|
|
upload_output = gr.Textbox(label="Status") |
|
|
|
|
|
with gr.Row(): |
|
|
question = gr.Textbox(label="Ask a Question") |
|
|
answer = gr.Textbox(label="Answer") |
|
|
ask_btn = gr.Button("Ask") |
|
|
|
|
|
upload_btn.click(fn=process_docs, inputs=[file_upload], outputs=[upload_output]) |
|
|
ask_btn.click(fn=chat, inputs=[question], outputs=[answer]) |
|
|
|
|
|
demo.launch() |
|
|
|