ommore86 commited on
Commit
b4bcb01
·
1 Parent(s): 995aa4f

initial commit

Browse files
.gitattributes CHANGED
@@ -1,35 +1,2 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY . /app
6
+
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ ENV PORT=7860
10
+
11
+ EXPOSE 7860
12
+
13
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: gunicorn app:app
README.md CHANGED
@@ -1,10 +1,30 @@
1
- ---
2
- title: Legal Docs AI
3
- emoji: 👀
4
- colorFrom: gray
5
- colorTo: purple
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🧠 RAG-based Research Paper Chatbot
2
+
3
+ After multiple failed prototypes and experiments with different LLMs, I’m excited to share a working version of my PDF chatbot web app — where users can upload a research paper or any PDF and chat with an AI model that gives contextually relevant answers based on the document contents!
4
+
5
+ ### 💡 What it does:
6
+ Upload any research paper (or PDF), and the app will let you chat with it intelligently. It breaks down the PDF into chunks, creates embeddings, indexes them, and uses a language model to generate answers from the document content. Perfect for students, researchers, or curious minds who want to understand dense PDFs faster!
7
+
8
+ ## 🚀 Live Demo
9
+ [Click here](https://huggingface.co/spaces/ommore86/research-paper-chatbot) to get the live demo of the webpage !!!
10
+
11
+ ## 📸 Screenshots
12
+ ![Homepage](homepage.png)
13
+ ![ChatPage-1](chatpage.png)
14
+ ![ChatPage-1](chatpage2.png)
15
+ ![TechStack](tech_stack.png)
16
+
17
+ ## 🛠️ Tech Stack & Tools:
18
+ - <b>Frontend:</b> TailwindCSS
19
+ - <b>Backend:</b> Python + Flask
20
+ - <b>PDF Processing:</b> LangChain
21
+ - <b>Embeddings:</b> all-MiniLM-L6-v2 (HuggingFace)
22
+ - <b>Vector Store:</b> FAISS (for fast similarity search)
23
+ - <b>LLM:</b> llama-3.1-8b-instant — "after trying with other similar models like Mixtral and llama-3.3-70b-versatile"
24
+ - <b>Deployment:</b> Hugging Face Spaces (Earlier I used Vercel → then Render → but RAM limits made me shift to HF Spaces, and it’s been a game changer!)
25
+
26
+ ## Publication
27
+ I have made the publication of this project on readytensor.ai. [Click here](https://app.readytensor.ai/publications/rag-based-research-paper-chatbot-D2mnoaeo4HhR) and do check it out!
28
+
29
+ ## 🤝 Let's Connect!
30
+ If you’d like to collaborate, give feedback, or just say hi — don’t hesitate to reach out or connect with me on [LinkedIn](https://www.linkedin.com/in/om-more-b802b2281/).
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, uuid, gc
2
+ from functools import lru_cache
3
+
4
+ from flask import Flask, render_template, request, jsonify
5
+ from werkzeug.utils import secure_filename
6
+ from dotenv import load_dotenv
7
+ import PyPDF2
8
+
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain_community.vectorstores import FAISS
11
+ # from langchain_huggingface import (
12
+ # HuggingFaceEmbeddings,
13
+ # HuggingFaceEndpoint,
14
+ # ChatHuggingFace,
15
+ # )
16
+ from langchain_huggingface import HuggingFaceEmbeddings
17
+ from langchain_groq import ChatGroq
18
+ from langchain.chains import RetrievalQA
19
+
20
+ load_dotenv()
21
+ UPLOAD_FOLDER = os.path.join("/tmp", "uploads")
22
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
23
+
24
+ os.environ["HF_HOME"] = "/tmp/huggingface"
25
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers"
26
+ os.environ["HF_DATASETS_CACHE"] = "/tmp/huggingface/datasets"
27
+
28
+
29
+ app = Flask(__name__)
30
+ app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
31
+ qa_chains: dict[str, RetrievalQA] = {}
32
+
33
+ @lru_cache(maxsize=1)
34
+ def get_embedder():
35
+ return HuggingFaceEmbeddings(
36
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
37
+ )
38
+
39
+ @lru_cache(maxsize=1)
40
+ def get_llm():
41
+ # return ChatHuggingFace(
42
+ # llm=HuggingFaceEndpoint(
43
+ # repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
44
+ # temperature=0.3,
45
+ # max_new_tokens=512,
46
+ # )
47
+ # )
48
+ return ChatGroq(
49
+ model_name="llama-3.1-8b-instant", # other options - llama-3.3-70b-versatile(very accurate), mistral-saba-24b(unlimited)
50
+ temperature=0.3,
51
+ max_tokens=512,
52
+ )
53
+
54
+ def extract_text_from_pdf(path: str) -> str:
55
+ with open(path, "rb") as f:
56
+ reader = PyPDF2.PdfReader(f)
57
+ return "".join(p.extract_text() or "" for p in reader.pages)
58
+
59
+ def build_qa_for_pdf(path: str) -> RetrievalQA:
60
+ text = extract_text_from_pdf(path)
61
+ chunks = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\
62
+ .split_text(text)
63
+
64
+ store = FAISS.from_texts(chunks, get_embedder())
65
+ retriever = store.as_retriever(search_type="mmr", search_kwargs={"k": 5})
66
+ return RetrievalQA.from_chain_type(llm=get_llm(), retriever=retriever)
67
+
68
+ @app.route("/healthz")
69
+ def healthz():
70
+ return "ok", 200
71
+
72
+ @app.route("/", methods=["GET"])
73
+ def index():
74
+ return render_template("index.html")
75
+
76
+ @app.post("/upload")
77
+ def upload_file():
78
+ file = request.files.get("file")
79
+ if not file or not file.filename.lower().endswith(".pdf"):
80
+ return jsonify({"error": "Please upload a valid PDF."}), 400
81
+
82
+ filename = f"{uuid.uuid4()}_{secure_filename(file.filename)}"
83
+ path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
84
+ file.save(path)
85
+
86
+ try:
87
+ qa_chains[filename] = build_qa_for_pdf(path)
88
+ return jsonify({"file_id": filename}), 200
89
+ except Exception as e:
90
+ return jsonify({"error": f"Error building chain: {e}"}), 500
91
+
92
+ @app.post("/ask")
93
+ def ask_question():
94
+ data = request.get_json(force=True)
95
+ file_id = data.get("file_id")
96
+ question = (data.get("question") or "").strip()
97
+
98
+ if not file_id or not question:
99
+ return jsonify({"error": "Missing file_id or question."}), 400
100
+
101
+ qa_chain = qa_chains.get(file_id)
102
+ if not qa_chain:
103
+ return jsonify({"error": "File not found or expired."}), 404
104
+
105
+ try:
106
+ result = qa_chain.invoke({"query": question})
107
+ answer = result["result"] if isinstance(result, dict) else result
108
+ gc.collect()
109
+ return jsonify({"answer": answer}), 200
110
+ except Exception as e:
111
+ return jsonify({"error": f"Error: {e}"}), 500
112
+
113
+ if __name__ == "__main__":
114
+ app.run(debug=True)
pdf_extractor.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
3
+ from langchain_community.vectorstores import FAISS
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.chains import RetrievalQA
6
+ import PyPDF2
7
+
8
+ # Set your Hugging Face API token
9
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
10
+
11
+ # Load and split PDF
12
+ def extract_text_from_pdf(file_path):
13
+ with open(file_path, 'rb') as file:
14
+ reader = PyPDF2.PdfReader(file)
15
+ text = ''
16
+ for page in reader.pages:
17
+ text += page.extract_text()
18
+ return text
19
+
20
+ pdf_text = extract_text_from_pdf("IEEEpaper.pdf")
21
+
22
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
23
+ chunks = splitter.split_text(pdf_text)
24
+
25
+ from langchain_huggingface import HuggingFaceEmbeddings
26
+ embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
27
+
28
+ vectorstore = FAISS.from_texts(chunks, embedding)
29
+
30
+ # ✅ Use HuggingFaceEndpoint correctly
31
+ hf_llm = HuggingFaceEndpoint(
32
+ repo_id="HuggingFaceH4/zephyr-7b-alpha",
33
+ temperature=0.5,
34
+ max_new_tokens=512,
35
+ huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"]
36
+ )
37
+
38
+ llm = ChatHuggingFace(llm=hf_llm)
39
+
40
+ qa = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever())
41
+ response = qa.run("Who is the author?")
42
+
43
+ # print("Total Chunks:", len(chunks))
44
+ # print("First Chunk:", chunks[0] if chunks else "No chunks extracted")
requirements.txt ADDED
Binary file (3.4 kB). View file
 
templates/android-chrome-192x192.png ADDED
templates/android-chrome-512x512.png ADDED
templates/apple-touch-icon.png ADDED
templates/bot_logo.png ADDED
templates/favicon-16x16.png ADDED
templates/favicon-32x32.png ADDED
templates/favicon.ico ADDED
templates/index.html ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <title>PDF Chatbot</title>
8
+ <script src="https://cdn.tailwindcss.com"></script>
9
+ <link rel="icon" href="favicon.ico" type="image/x-icon">
10
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.7.2/css/all.min.css"
11
+ referrerpolicy="no-referrer">
12
+ </head>
13
+
14
+ <body class="m-0 bg-white">
15
+ <div class="bg-gradient-to-b from-indigo-200 via-white to-indigo-100">
16
+ <div class="bg-gradient-to-b from-indigo-50 via-white to-indigo-100">
17
+
18
+ <!-- Header -->
19
+ <header class="fixed inset-x-0 top-0 z-50 bg-white/70 backdrop-blur-md shadow-sm">
20
+ <div class="max-w-7xl mx-auto px-6 py-3 flex items-center justify-between">
21
+
22
+ <a href="/" class="text-xl sm:text-2xl font-bold tracking-tight text-indigo-600">
23
+ AskBot.AI
24
+ </a>
25
+
26
+ <nav class="hidden sm:flex items-center space-x-6 text-sm font-medium">
27
+ <a href="#chat" class="text-gray-600 hover:text-indigo-600 transition">Chat</a>
28
+ <a href="#features" class="text-gray-600 hover:text-indigo-600 transition">Features</a>
29
+ <a href="#tech" class="text-gray-600 hover:text-indigo-600 transition">Tech</a>
30
+ <a href="#connect" class="text-gray-600 hover:text-indigo-600 transition">Connect</a>
31
+ </nav>
32
+
33
+ <button class="sm:hidden inline-flex items-center justify-center h-8 w-8 text-gray-600 hover:text-indigo-600">
34
+ <svg fill="none" viewBox="0 0 24 24" stroke="currentColor">
35
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 6h16M4 12h16M4 18h16" />
36
+ </svg>
37
+ </button>
38
+ </div>
39
+ </header>
40
+ <!-- /Header -->
41
+
42
+
43
+ <!-- <main class=""> -->
44
+ <section>
45
+ <div class="bg-white">
46
+ <!-- <div class="relative isolate overflow-hidden bg-gradient-to-b from-gray-200/50"> -->
47
+ <div class="mx-auto max-w-7xl pb-16 pt-4 sm:pb-24 lg:grid lg:grid-cols-2 lg:gap-x-8 lg:px-8 lg:py-10">
48
+ <div class="px-6 lg:px-0 lg:pt-4">
49
+ <div class="mx-auto max-w-2xl">
50
+ <div class="max-w-lg">
51
+ <h1 class="mt-10 text-4xl font-bold tracking-tight text-gray-900 sm:text-5xl">Upload PDFs, <br>
52
+ Research Papers & <br> Ask me Anything!!!</h1>
53
+ <p class="mt-6 text-lg leading-8 text-gray-600">My chatbot reads your research paper
54
+ or any PDFs and answers questions using AI model.</p>
55
+ </div>
56
+ </div>
57
+ </div>
58
+
59
+ <div class="mt-20 lg:mt-0 flex justify-center items-center">
60
+ <img
61
+ src="https://res.cloudinary.com/dekujzz4s/image/upload/v1751450446/giphy_sab0e8.webp"
62
+ class="drop-shadow-2xl" height="350" width="550" alt="chatbot image">
63
+ </div>
64
+ </div>
65
+ </div>
66
+ </div>
67
+ </section>
68
+
69
+ <!-- Chat Section -->
70
+ <section id="chat" class="py-20">
71
+ <div class="max-w-6xl mx-auto px-6">
72
+
73
+ <div class="text-center mb-10">
74
+ <h2 class="text-3xl font-bold text-gray-800">Chat With Your PDF</h2>
75
+ <p class="mt-2 text-gray-600 text-sm">Upload a research paper and ask any question. Powered by AI.</p>
76
+ </div>
77
+
78
+ <div id="uploadChatCard" class="bg-white shadow-xl border border-gray-200 rounded-xl p-6 space-y-6">
79
+ <div id="uploader" class="space-y-4 text-center">
80
+ <div class="flex items-center justify-center gap-4 flex-col sm:flex-row">
81
+ <label for="pdfFile"
82
+ class="cursor-pointer bg-indigo-500 hover:bg-indigo-600 text-white px-6 py-2 rounded shadow text-sm transition">
83
+ Choose PDF
84
+ </label>
85
+ <span id="fileNameDisplayText" class="text-sm text-gray-600 italic">No file chosen</span>
86
+ <input id="pdfFile" type="file" accept=".pdf" class="hidden">
87
+ </div>
88
+
89
+ <button id="btnUpload" disabled class="bg-indigo-500 hover:bg-indigo-600 text-white px-6 py-2 rounded">
90
+ Upload PDF
91
+ </button>
92
+ </div>
93
+
94
+ <!-- Chat Interface -->
95
+ <div id="chatInterface" class="hidden space-y-4">
96
+ <div class="flex justify-between items-center max-w-3xl mx-auto">
97
+ <div id="fileNameDisplay" class="text-sm text-gray-600 font-medium italic"></div>
98
+ <button id="reuploadBtn" class="text-sm text-indigo-600 hover:underline">
99
+ Upload Another PDF
100
+ </button>
101
+ </div>
102
+
103
+ <!-- Chat Box -->
104
+ <div id="chatBox"
105
+ class="space-y-3 max-w-3xl max-h-96 overflow-y-auto px-4 py-4 border rounded-lg bg-white shadow-inner mx-auto">
106
+ </div>
107
+
108
+ <form id="chatForm" class="flex max-w-3xl mx-auto">
109
+ <input id="msgInput" autocomplete="off" placeholder="Ask a question..."
110
+ class="flex-1 rounded-l-lg border-gray-300 px-4 py-3 shadow" />
111
+ <button class="bg-indigo-500 hover:bg-indigo-600 text-white px-6 rounded-r-lg">
112
+ Send
113
+ </button>
114
+ </form>
115
+ </div>
116
+ </div>
117
+
118
+ <!-- Result -->
119
+ {% if results %}
120
+ <div class="mt-6 bg-white p-6 rounded-lg shadow border border-gray-200">
121
+ <h2 class="text-lg font-semibold text-gray-800 mb-2">Answer:</h2>
122
+ <p class="text-gray-700 whitespace-pre-wrap">{{ results }}</p>
123
+ </div>
124
+ {% endif %}
125
+
126
+ </div>
127
+ </section>
128
+
129
+ <!-- Features Section -->
130
+ <section id="features" class="py-20">
131
+ <div class="max-w-7xl mx-auto px-6">
132
+
133
+ <div class="text-center mb-12">
134
+ <h2 class="text-3xl font-bold text-gray-800">Features</h2>
135
+ <p class="mt-2 text-gray-600 text-sm">What makes our PDF Chatbot stand out</p>
136
+ </div>
137
+
138
+ <div class="grid grid-cols-1 md:grid-cols-3 gap-8">
139
+
140
+ <!-- Instant PDF Reading -->
141
+ <div
142
+ class="bg-white bg-opacity-80 backdrop-blur-md border border-gray-200 rounded-xl p-6 shadow-lg text-center transition hover:shadow-xl">
143
+ <img src="https://res.cloudinary.com/dekujzz4s/image/upload/v1751357240/feature_img_1_nhlopv.jpg"
144
+ alt="Feature 1" class="mx-auto h-28 mb-4 object-contain" />
145
+ <h3 class="text-xl font-semibold text-gray-800 mb-2">Instant PDF Understanding</h3>
146
+ <p class="text-sm text-gray-600">Ask any question and get accurate responses
147
+ based on the uploaded PDF content in seconds.</p>
148
+ </div>
149
+
150
+ <!-- Multiple File-Type Support -->
151
+ <div
152
+ class="bg-white bg-opacity-80 backdrop-blur-md border border-gray-200 rounded-xl p-6 shadow-lg text-center transition hover:shadow-xl">
153
+ <img src="https://res.cloudinary.com/dekujzz4s/image/upload/v1751357240/feature_img_2_b8feaa.jpg"
154
+ alt="Feature 2" class="mx-auto h-28 mb-4 object-contain" />
155
+ <h3 class="text-xl font-semibold text-gray-800 mb-2">Multiple File-Type Support</h3>
156
+ <p class="text-sm text-gray-600">Upload research papers, eBooks, or reports —
157
+ the chatbot can handle various types of academic PDFs.</p>
158
+ </div>
159
+
160
+ <!-- AI-Powered Accuracy -->
161
+ <div
162
+ class="bg-white bg-opacity-80 backdrop-blur-md border border-gray-200 rounded-xl p-6 shadow-lg text-center transition hover:shadow-xl">
163
+ <img src="https://res.cloudinary.com/dekujzz4s/image/upload/v1751357580/feature_img_3_qeecda.jpg"
164
+ alt="Feature 3" class="mx-auto h-28 mb-4 object-contain" />
165
+ <h3 class="text-xl font-semibold text-gray-800 mb-2">AI-Powered Accuracy</h3>
166
+ <p class="text-sm text-gray-600">Built with powerful AI models to understand complex contexts, summaries,
167
+ and
168
+ in-depth queries.</p>
169
+ </div>
170
+
171
+ </div>
172
+ </div>
173
+ </section>
174
+
175
+
176
+ <!-- ========== Tech Stack & AI Models ========== -->
177
+ <section id="tech" class="py-20">
178
+ <div class="max-w-7xl mx-auto px-6">
179
+ <div class="text-center mb-12">
180
+ <h2 class="text-3xl font-bold text-gray-800">Tech Stack &amp; AI Models</h2>
181
+ <p class="mt-2 text-gray-600 text-sm">
182
+ The key Technologies and AI models that power this chatbot
183
+ </p>
184
+ </div>
185
+
186
+ <!-- Python and Flask -->
187
+ <div class="grid grid-cols-1 md:grid-cols-3 gap-8">
188
+ <div
189
+ class="bg-white/80 backdrop-blur-md border border-gray-200 rounded-xl p-6 shadow-lg text-center transition hover:shadow-xl">
190
+ <img src="https://cdn-icons-png.flaticon.com/512/5968/5968350.png" alt="Python Flask"
191
+ class="mx-auto h-20 mb-4 object-contain" />
192
+ <h3 class="text-xl font-semibold text-gray-800 mb-2">Python & Flask</h3>
193
+ <p class="text-sm text-gray-600">Handles backend, file uploads & AI Responses.</p>
194
+ </div>
195
+
196
+ <!-- LangChain -->
197
+ <div
198
+ class="bg-white/80 backdrop-blur-md border border-gray-200 rounded-xl p-6 shadow-lg text-center transition hover:shadow-xl">
199
+ <img src="https://res.cloudinary.com/dekujzz4s/image/upload/v1751439498/download_wabh02.png" alt="LangChain"
200
+ class="mx-auto h-20 mb-4 object-contain" />
201
+ <h3 class="text-xl font-semibold text-gray-800 mb-2">LangChain</h3>
202
+ <p class="text-sm text-gray-600">LangChain manages PDF processing & LLM calls.</p>
203
+ </div>
204
+
205
+ <!-- Embeddings -->
206
+ <div
207
+ class="bg-white/80 backdrop-blur-md border border-gray-200 rounded-xl p-6 shadow-lg text-center transition hover:shadow-xl">
208
+ <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="MiniLM Embeddings"
209
+ class="mx-auto h-20 mb-4 object-contain" />
210
+ <h3 class="text-xl font-semibold text-gray-800 mb-2">Embedding Model: all-MiniLM-L6-v2 </h3>
211
+ <p class="text-sm text-gray-600">Converts Text into Numerical Vectors.</p>
212
+ </div>
213
+
214
+ <!-- FAISS -->
215
+ <div
216
+ class="bg-white/80 backdrop-blur-md border border-gray-200 rounded-xl p-6 shadow-lg text-center transition hover:shadow-xl">
217
+ <img src="https://res.cloudinary.com/dekujzz4s/image/upload/v1751439632/download_so6va6.png" alt="FAISS"
218
+ class="mx-auto h-20 mb-4 object-contain" />
219
+ <h3 class="text-xl font-semibold text-gray-800 mb-2">FAISS Vector Store</h3>
220
+ <p class="text-sm text-gray-600">Enables Fast Similarity Search of vectors in PDFs.</p>
221
+ </div>
222
+
223
+
224
+ <!-- LLM -->
225
+ <div
226
+ class="bg-white/80 backdrop-blur-md border border-gray-200 rounded-xl p-6 shadow-lg text-center transition hover:shadow-xl">
227
+ <img src="https://res.cloudinary.com/dekujzz4s/image/upload/v1751520819/download_cbh1yx.jpg" alt="Mixtral LLM"
228
+ class="mx-auto h-20 mb-4 object-contain" />
229
+ <h3 class="text-xl font-semibold text-gray-800 mb-2">LLM: llama-3.1-8b-instant</h3>
230
+ <p class="text-sm text-gray-600">Generates correct answers from extracted PDF chunks.</p>
231
+ </div>
232
+
233
+ <!-- Tailwind CSS -->
234
+ <div
235
+ class="bg-white/80 backdrop-blur-md border border-gray-200 rounded-xl p-6 shadow-lg text-center transition hover:shadow-xl">
236
+ <img src="https://res.cloudinary.com/dekujzz4s/image/upload/v1751439818/download_qgtwpn.png"
237
+ alt="Tailwind CSS" class="mx-auto h-20 mb-4 object-contain" />
238
+ <h3 class="text-xl font-semibold text-gray-800 mb-2">Tailwind CSS</h3>
239
+ <p class="text-sm text-gray-600">Stylish and fully responsive frontend.</p>
240
+ </div>
241
+ </div>
242
+ </div>
243
+ </section>
244
+ <!-- </main> -->
245
+ </div>
246
+
247
+ <!-- ========== Connect / Footer Section ========== -->
248
+ <footer id="connect" class="w-full bg-gray-900 text-gray-300">
249
+ <div class="max-w-7xl mx-auto px-6 py-8 text-center">
250
+
251
+ <h2 class="text-3xl font-bold text-white mb-2">Connect With Me</h2>
252
+ <p class="text-gray-400 mb-8">Feel free to reach out on any platform!</p>
253
+
254
+ <div class="flex justify-center space-x-8 text-2xl">
255
+ <!-- LinkedIn -->
256
+ <a href="https://www.linkedin.com/in/om-more-b802b2281/" target="_blank" rel="noopener noreferrer"
257
+ class="hover:text-blue-400 transition">
258
+ <i class="fa-brands fa-linkedin"></i>
259
+ </a>
260
+
261
+ <!-- GitHub -->
262
+ <a href="https://github.com/ommore86/" target="_blank" rel="noopener noreferrer"
263
+ class="hover:text-white transition">
264
+ <i class="fa-brands fa-github"></i>
265
+ </a>
266
+
267
+ <!-- Email -->
268
+ <a href="mailto:omgmore2005@gmail.com" class="hover:text-red-400 transition">
269
+ <i class="fa-solid fa-envelope"></i>
270
+ </a>
271
+ </div>
272
+
273
+ <div class="mt-10 border-t border-gray-700 pt-6 text-sm text-gray-500">
274
+ &copy; 2025 Om More | All rights reserved
275
+ </div>
276
+ </footer>
277
+
278
+ <script>
279
+ let fileId = null;
280
+
281
+ const pdfInput = document.getElementById("pdfFile");
282
+ const btnUpload = document.getElementById("btnUpload");
283
+ pdfInput.onchange = () => {
284
+ const file = pdfInput.files[0];
285
+ if (file) {
286
+ document.getElementById("fileNameDisplayText").textContent = file.name;
287
+ btnUpload.disabled = false;
288
+ } else {
289
+ document.getElementById("fileNameDisplayText").textContent = "No file chosen";
290
+ btnUpload.disabled = true;
291
+ }
292
+ };
293
+
294
+ btnUpload.onclick = async () => {
295
+ const file = pdfInput.files[0];
296
+ const formData = new FormData();
297
+ formData.append("file", file);
298
+ btnUpload.disabled = true;
299
+ btnUpload.textContent = "Uploading…";
300
+
301
+ const res = await fetch("/upload", { method: "POST", body: formData });
302
+ const data = await res.json();
303
+ if (res.ok) {
304
+ fileId = data.file_id;
305
+ document.getElementById("fileNameDisplay").textContent = pdfInput.files[0].name;
306
+
307
+ document.getElementById("uploader").classList.add("hidden");
308
+ document.getElementById("chatInterface").classList.remove("hidden");
309
+ document.getElementById("fileNameDisplay").textContent = `📄 ${file.name}`;
310
+ } else {
311
+ alert(data.error || "Upload failed");
312
+ btnUpload.disabled = false;
313
+ btnUpload.textContent = "Upload PDF";
314
+ }
315
+ };
316
+
317
+
318
+ // Chat logic
319
+ const chatBox = document.getElementById("chatBox");
320
+ const chatForm = document.getElementById("chatForm");
321
+ const msgInput = document.getElementById("msgInput");
322
+
323
+ chatForm.addEventListener("submit", async e => {
324
+ e.preventDefault();
325
+ const q = msgInput.value.trim();
326
+ if (!q) return;
327
+ appendBubble(q, "user");
328
+ msgInput.value = "";
329
+
330
+ // Showing bubble while waiting
331
+ const thinkingBubble = addThinkingBubble();
332
+
333
+ let answerText = "";
334
+ try {
335
+ const res = await fetch("/ask", {
336
+ method: "POST",
337
+ headers: { "Content-Type": "application/json" },
338
+ body: JSON.stringify({ question: q, file_id: fileId }),
339
+ });
340
+ const data = await res.json();
341
+ answerText = data.answer || data.error;
342
+ } catch (err) {
343
+ answerText = "Error fetching answer.";
344
+ }
345
+
346
+ // Replacing placeholder text
347
+ thinkingBubble.textContent = answerText;
348
+ });
349
+
350
+ function appendBubble(text, who) {
351
+ const bubble = document.createElement("div");
352
+ bubble.className = `w-fit rounded-2xl px-5 py-3 shadow break-words
353
+ ${who === "user"
354
+ ? "ml-auto bg-indigo-500 text-white"
355
+ : "mr-auto bg-gray-200 text-gray-900"}`;
356
+ bubble.textContent = text;
357
+ chatBox.appendChild(bubble);
358
+ chatBox.scrollTop = chatBox.scrollHeight;
359
+ return bubble;
360
+ }
361
+
362
+ function addThinkingBubble() {
363
+ const bubble = document.createElement("div");
364
+ bubble.className = "w-fit rounded-2xl px-5 py-3 shadow mr-auto bg-gray-200 text-gray-900";
365
+ bubble.innerHTML = `<span class="animate-pulse">Thinking…</span>`;
366
+ chatBox.appendChild(bubble);
367
+ chatBox.scrollTop = chatBox.scrollHeight;
368
+ return bubble;
369
+ }
370
+
371
+ document.getElementById("reuploadBtn").onclick = () => {
372
+ fileId = null;
373
+ pdfInput.value = "";
374
+
375
+ document.getElementById("uploader").classList.remove("hidden");
376
+ document.getElementById("chatInterface").classList.add("hidden");
377
+
378
+ document.getElementById("fileNameDisplay").textContent = "";
379
+ document.getElementById("chatBox").innerHTML = "";
380
+
381
+ btnUpload.disabled = true;
382
+ btnUpload.textContent = "Upload PDF";
383
+ };
384
+ </script>
385
+
386
+ </body>
387
+
388
+ </html>
templates/site.webmanifest ADDED
@@ -0,0 +1 @@
 
 
1
+ {"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"}
uploads/.huggingface.yml ADDED
@@ -0,0 +1 @@
 
 
1
+ sdk: "docker"