| import os |
| import pdfplumber |
| from sentence_transformers import SentenceTransformer |
| import faiss |
| import numpy as np |
| from gpt4all import GPT4All |
|
|
| |
| |
| |
| papers_folder = "papers" |
| pdf_texts = [] |
|
|
| for filename in os.listdir(papers_folder): |
| if filename.endswith(".pdf"): |
| path = os.path.join(papers_folder, filename) |
| with pdfplumber.open(path) as pdf: |
| text = "" |
| for page in pdf.pages: |
| page_text = page.extract_text() |
| if page_text: |
| text += page_text + "\n" |
| pdf_texts.append(text) |
|
|
| print(f"Loaded {len(pdf_texts)} PDFs.") |
|
|
| |
| |
| |
| def chunk_text(text, chunk_size=500): |
| words = text.split() |
| return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] |
|
|
| chunks = [] |
| for text in pdf_texts: |
| chunks.extend(chunk_text(text)) |
|
|
| print(f"Total text chunks: {len(chunks)}") |
|
|
| |
| |
| |
| embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") |
| embeddings = embedding_model.encode(chunks, show_progress_bar=True) |
|
|
| |
| |
| |
| dimension = embeddings.shape[1] |
| index = faiss.IndexFlatL2(dimension) |
| index.add(np.array(embeddings)) |
|
|
| print("FAISS index created with all chunks.") |
|
|
| |
| |
| |
| gpt_model = GPT4All(r"C:\Users\ACER\Desktop\Llama-3.2-3B-Instruct-Q4_0.gguf") |
| print("GPT4All model loaded.") |
|
|
| |
| |
| |
| def ask_question(query, top_k=3): |
| |
| query_vec = embedding_model.encode([query]) |
| |
| |
| distances, indices = index.search(np.array(query_vec), top_k) |
| |
| |
| context = "\n".join([chunks[i] for i in indices[0]]) |
| |
| prompt = f"Answer the question using ONLY the information from the following context:\n{context}\n\nQuestion: {query}\nAnswer:" |
| |
| |
| response = gpt_model.generate(prompt, max_tokens=200, temp=0.7) |
| return response |
|
|
| |
| |
| |
| print("\nResearch-Paper Chatbot (type 'exit' to quit)") |
| while True: |
| question = input("\nYour question: ") |
| if question.lower() in ["exit", "quit"]: |
| break |
| answer = ask_question(question) |
| print("\nAnswer:\n", answer) |