Arif commited on
Commit
9a62b2a
·
1 Parent(s): 704b133

Huggingface upload

Browse files
Files changed (4) hide show
  1. Dockerfile +12 -0
  2. app.py +12 -0
  3. app/static/index.html +70 -0
  4. requirements.txt +519 -0
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /code
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.staticfiles import StaticFiles
3
+ from app.main import app as backend_app
4
+
5
+ main_app = FastAPI()
6
+
7
+ # Backend API at /api
8
+ main_app.mount("/api", backend_app)
9
+ # Frontend served at /
10
+ main_app.mount("/", StaticFiles(directory="static", html=True), name="static")
11
+
12
+ app = main_app # Hugging Face expects this variable
app/static/index.html ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>RAG Portfolio Project Q&A</title>
6
+ <meta name="viewport" content="width=device-width, initial-scale=1">
7
+ <style>
8
+ body { margin:0; font-family:system-ui,sans-serif; background: #f6f8fc; }
9
+ .container { max-width: 500px; margin:40px auto; background:#fff; border-radius:18px; padding:36px 28px 30px 28px; box-shadow:0 8px 36px #0001;}
10
+ h1 { color:#466bb7; font-size:2rem; margin-bottom: 8px;}
11
+ .subtitle { color: #686868; margin-bottom:20px; font-size:1.05rem;}
12
+ textarea { width:100%; min-height:70px; font-size:1rem; border-radius:7px; border:1.5px solid #dbeafe; padding:10px;}
13
+ button { margin:10px 0 0 0; padding:12px 24px; border:none; border-radius:8px; background:#466bb7; color:#fff; font-size:1.08em; font-weight:500; cursor:pointer;}
14
+ #answer { background:#f3f4fa; border-left:5px solid #466bb7; margin-top:24px; padding:19px; border-radius:8px 7px 7px 8px;}
15
+ .source { font-size:0.92em; color:#666; margin-top:7px; }
16
+ #loading { margin:20px 0; color: #466bb7; font-weight: bold;}
17
+ #error { color: #d00; margin: 12px 0;}
18
+ footer { margin:32px auto 0; max-width:500px; font-size:0.92em; text-align:center; color:#aaa; }
19
+ @media (max-width:600px) {.container {padding:18px 5vw 20px 5vw;} }
20
+ </style>
21
+ </head>
22
+ <body>
23
+ <div class="container">
24
+ <h1>RAG Demo: Document Q&A</h1>
25
+ <div class="subtitle">Ask any question about your uploaded documents.</div>
26
+ <form id="form">
27
+ <textarea id="question" placeholder="E.g. What is deep learning?" required maxlength="300"></textarea>
28
+ <button type="submit">Ask AI</button>
29
+ </form>
30
+ <div id="loading" style="display:none;">Thinking...</div>
31
+ <div id="error"></div>
32
+ <div id="answer"></div>
33
+ </div>
34
+ <footer>
35
+ <b>Open-source RAG Portfolio Project</b> &nbsp;|&nbsp; <a href="https://github.com/YOUR_USERNAME/rag-portfolio-project" target="_blank">GitHub</a>
36
+ </footer>
37
+ <script>
38
+ // CHANGE THIS to your API endpoint (must be public, e.g., https://your-space-name.hf.space/query or deployed backend URL)
39
+ const API_URL = "/api/query";
40
+ const form = document.getElementById("form");
41
+ form.onsubmit = async (e) => {
42
+ e.preventDefault();
43
+ document.getElementById("error").textContent = "";
44
+ document.getElementById("answer").innerHTML = "";
45
+ document.getElementById("loading").style.display = "block";
46
+ const q = document.getElementById("question").value.trim();
47
+ if (!q) return;
48
+ let r, data;
49
+ try {
50
+ r = await fetch(API_URL, {
51
+ method: "POST",
52
+ headers: {"Content-Type": "application/json"},
53
+ body: JSON.stringify({question: q, top_k: 5}),
54
+ });
55
+ data = await r.json();
56
+ document.getElementById("loading").style.display = "none";
57
+ if (!r.ok) throw new Error("Server error: " + (data?.detail || r.status));
58
+ } catch (err) {
59
+ document.getElementById("loading").style.display = "none";
60
+ document.getElementById("error").textContent = "Error: " + (err.message || "backend unavailable");
61
+ return;
62
+ }
63
+ document.getElementById("answer").innerHTML =
64
+ `<b>Answer:</b><br>${data.answer || "No answer."}<br>` +
65
+ (data.sources && data.sources.length ?
66
+ data.sources.map((s,i)=>`<div class="source">Source ${i+1}: ${s.source} (chunk ${s.chunk_index}, relevance: ${Math.round(s.score*100)}%)</div>`).join('') : '');
67
+ };
68
+ </script>
69
+ </body>
70
+ </html>
requirements.txt ADDED
@@ -0,0 +1,519 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile --output-file requirements.txt pyproject.toml
3
+ aiofiles==25.1.0
4
+ # via unstructured-client
5
+ aiohappyeyeballs==2.6.1
6
+ # via aiohttp
7
+ aiohttp==3.13.1
8
+ # via
9
+ # fsspec
10
+ # instructor
11
+ # langchain-community
12
+ aiosignal==1.4.0
13
+ # via aiohttp
14
+ annotated-doc==0.0.3
15
+ # via fastapi
16
+ annotated-types==0.7.0
17
+ # via pydantic
18
+ anyio==4.11.0
19
+ # via
20
+ # httpx
21
+ # openai
22
+ # starlette
23
+ appdirs==1.4.4
24
+ # via ragas
25
+ async-timeout==4.0.3
26
+ # via
27
+ # aiohttp
28
+ # langchain-classic
29
+ attrs==25.4.0
30
+ # via aiohttp
31
+ backoff==2.2.1
32
+ # via unstructured
33
+ beautifulsoup4==4.14.2
34
+ # via unstructured
35
+ certifi==2025.10.5
36
+ # via
37
+ # httpcore
38
+ # httpx
39
+ # requests
40
+ cffi==2.0.0
41
+ # via cryptography
42
+ cfgv==3.4.0
43
+ # via pre-commit
44
+ charset-normalizer==3.4.4
45
+ # via
46
+ # requests
47
+ # unstructured
48
+ click==8.3.0
49
+ # via
50
+ # nltk
51
+ # python-oxmsg
52
+ # typer
53
+ # uvicorn
54
+ cryptography==46.0.3
55
+ # via unstructured-client
56
+ dataclasses-json==0.6.7
57
+ # via
58
+ # langchain-community
59
+ # unstructured
60
+ datasets==4.3.0
61
+ # via ragas
62
+ dill==0.4.0
63
+ # via
64
+ # datasets
65
+ # multiprocess
66
+ diskcache==5.6.3
67
+ # via
68
+ # instructor
69
+ # ragas
70
+ distlib==0.4.0
71
+ # via virtualenv
72
+ distro==1.9.0
73
+ # via openai
74
+ docstring-parser==0.17.0
75
+ # via instructor
76
+ emoji==2.15.0
77
+ # via unstructured
78
+ exceptiongroup==1.3.0
79
+ # via anyio
80
+ fastapi==0.120.1
81
+ # via generative-ai-portfolio-project (pyproject.toml)
82
+ filelock==3.20.0
83
+ # via
84
+ # datasets
85
+ # huggingface-hub
86
+ # torch
87
+ # transformers
88
+ # virtualenv
89
+ filetype==1.2.0
90
+ # via unstructured
91
+ frozenlist==1.8.0
92
+ # via
93
+ # aiohttp
94
+ # aiosignal
95
+ fsspec==2025.9.0
96
+ # via
97
+ # datasets
98
+ # huggingface-hub
99
+ # torch
100
+ gitdb==4.0.12
101
+ # via gitpython
102
+ gitpython==3.1.45
103
+ # via ragas
104
+ grpcio==1.76.0
105
+ # via qdrant-client
106
+ h11==0.16.0
107
+ # via
108
+ # httpcore
109
+ # uvicorn
110
+ h2==4.3.0
111
+ # via httpx
112
+ hf-xet==1.2.0
113
+ # via huggingface-hub
114
+ hpack==4.1.0
115
+ # via h2
116
+ html5lib==1.1
117
+ # via unstructured
118
+ httpcore==1.0.9
119
+ # via
120
+ # httpx
121
+ # unstructured-client
122
+ httpx==0.28.1
123
+ # via
124
+ # datasets
125
+ # langgraph-sdk
126
+ # langsmith
127
+ # ollama
128
+ # openai
129
+ # qdrant-client
130
+ # unstructured-client
131
+ httpx-sse==0.4.3
132
+ # via langchain-community
133
+ huggingface-hub==0.36.0
134
+ # via
135
+ # datasets
136
+ # sentence-transformers
137
+ # tokenizers
138
+ # transformers
139
+ hyperframe==6.1.0
140
+ # via h2
141
+ identify==2.6.15
142
+ # via pre-commit
143
+ idna==3.11
144
+ # via
145
+ # anyio
146
+ # httpx
147
+ # requests
148
+ # yarl
149
+ instructor==1.12.0
150
+ # via ragas
151
+ jinja2==3.1.6
152
+ # via
153
+ # instructor
154
+ # torch
155
+ jiter==0.10.0
156
+ # via
157
+ # instructor
158
+ # openai
159
+ joblib==1.5.2
160
+ # via
161
+ # nltk
162
+ # scikit-learn
163
+ jsonpatch==1.33
164
+ # via langchain-core
165
+ jsonpointer==3.0.0
166
+ # via jsonpatch
167
+ langchain==1.0.2
168
+ # via
169
+ # generative-ai-portfolio-project (pyproject.toml)
170
+ # ragas
171
+ langchain-classic==1.0.0
172
+ # via langchain-community
173
+ langchain-community==0.4.1
174
+ # via
175
+ # generative-ai-portfolio-project (pyproject.toml)
176
+ # ragas
177
+ langchain-core==1.0.1
178
+ # via
179
+ # langchain
180
+ # langchain-classic
181
+ # langchain-community
182
+ # langchain-ollama
183
+ # langchain-openai
184
+ # langchain-text-splitters
185
+ # langgraph
186
+ # langgraph-checkpoint
187
+ # langgraph-prebuilt
188
+ # ragas
189
+ langchain-ollama==1.0.0
190
+ # via generative-ai-portfolio-project (pyproject.toml)
191
+ langchain-openai==1.0.1
192
+ # via ragas
193
+ langchain-text-splitters==1.0.0
194
+ # via
195
+ # generative-ai-portfolio-project (pyproject.toml)
196
+ # langchain-classic
197
+ langdetect==1.0.9
198
+ # via unstructured
199
+ langgraph==1.0.1
200
+ # via langchain
201
+ langgraph-checkpoint==3.0.0
202
+ # via
203
+ # langgraph
204
+ # langgraph-prebuilt
205
+ langgraph-prebuilt==1.0.1
206
+ # via langgraph
207
+ langgraph-sdk==0.2.9
208
+ # via langgraph
209
+ langsmith==0.4.38
210
+ # via
211
+ # langchain-classic
212
+ # langchain-community
213
+ # langchain-core
214
+ lxml==6.0.2
215
+ # via
216
+ # python-docx
217
+ # unstructured
218
+ markdown-it-py==4.0.0
219
+ # via rich
220
+ markupsafe==3.0.3
221
+ # via jinja2
222
+ marshmallow==3.26.1
223
+ # via dataclasses-json
224
+ mdurl==0.1.2
225
+ # via markdown-it-py
226
+ mpmath==1.3.0
227
+ # via sympy
228
+ multidict==6.7.0
229
+ # via
230
+ # aiohttp
231
+ # yarl
232
+ multiprocess==0.70.16
233
+ # via datasets
234
+ mypy-extensions==1.1.0
235
+ # via typing-inspect
236
+ nest-asyncio==1.6.0
237
+ # via ragas
238
+ networkx==3.4.2
239
+ # via
240
+ # ragas
241
+ # torch
242
+ nltk==3.9.2
243
+ # via unstructured
244
+ nodeenv==1.9.1
245
+ # via pre-commit
246
+ numpy==2.2.6
247
+ # via
248
+ # datasets
249
+ # langchain-community
250
+ # pandas
251
+ # qdrant-client
252
+ # ragas
253
+ # rank-bm25
254
+ # scikit-learn
255
+ # scikit-network
256
+ # scipy
257
+ # transformers
258
+ # unstructured
259
+ olefile==0.47
260
+ # via python-oxmsg
261
+ ollama==0.6.0
262
+ # via langchain-ollama
263
+ openai==1.109.1
264
+ # via
265
+ # instructor
266
+ # langchain-openai
267
+ # ragas
268
+ orjson==3.11.4
269
+ # via
270
+ # langgraph-sdk
271
+ # langsmith
272
+ ormsgpack==1.11.0
273
+ # via langgraph-checkpoint
274
+ packaging==25.0
275
+ # via
276
+ # datasets
277
+ # huggingface-hub
278
+ # langchain-core
279
+ # langsmith
280
+ # marshmallow
281
+ # transformers
282
+ pandas==2.3.3
283
+ # via datasets
284
+ pillow==12.0.0
285
+ # via
286
+ # ragas
287
+ # sentence-transformers
288
+ platformdirs==4.5.0
289
+ # via virtualenv
290
+ portalocker==3.2.0
291
+ # via qdrant-client
292
+ pre-commit==4.3.0
293
+ # via instructor
294
+ propcache==0.4.1
295
+ # via
296
+ # aiohttp
297
+ # yarl
298
+ protobuf==6.33.0
299
+ # via qdrant-client
300
+ psutil==7.1.2
301
+ # via unstructured
302
+ pyarrow==22.0.0
303
+ # via datasets
304
+ pycparser==2.23
305
+ # via cffi
306
+ pydantic==2.12.3
307
+ # via
308
+ # fastapi
309
+ # instructor
310
+ # langchain
311
+ # langchain-classic
312
+ # langchain-core
313
+ # langgraph
314
+ # langsmith
315
+ # ollama
316
+ # openai
317
+ # pydantic-settings
318
+ # qdrant-client
319
+ # ragas
320
+ # unstructured-client
321
+ pydantic-core==2.41.4
322
+ # via
323
+ # instructor
324
+ # pydantic
325
+ pydantic-settings==2.11.0
326
+ # via langchain-community
327
+ pygments==2.19.2
328
+ # via rich
329
+ pypdf==6.1.3
330
+ # via
331
+ # generative-ai-portfolio-project (pyproject.toml)
332
+ # unstructured-client
333
+ python-dateutil==2.9.0.post0
334
+ # via pandas
335
+ python-docx==1.2.0
336
+ # via generative-ai-portfolio-project (pyproject.toml)
337
+ python-dotenv==1.2.1
338
+ # via pydantic-settings
339
+ python-iso639==2025.2.18
340
+ # via unstructured
341
+ python-magic==0.4.27
342
+ # via unstructured
343
+ python-multipart==0.0.20
344
+ # via generative-ai-portfolio-project (pyproject.toml)
345
+ python-oxmsg==0.0.2
346
+ # via unstructured
347
+ pytz==2025.2
348
+ # via pandas
349
+ pyyaml==6.0.3
350
+ # via
351
+ # datasets
352
+ # huggingface-hub
353
+ # langchain-classic
354
+ # langchain-community
355
+ # langchain-core
356
+ # pre-commit
357
+ # transformers
358
+ qdrant-client==1.15.1
359
+ # via generative-ai-portfolio-project (pyproject.toml)
360
+ ragas==0.3.7
361
+ # via generative-ai-portfolio-project (pyproject.toml)
362
+ rank-bm25==0.2.2
363
+ # via generative-ai-portfolio-project (pyproject.toml)
364
+ rapidfuzz==3.14.1
365
+ # via unstructured
366
+ regex==2025.10.23
367
+ # via
368
+ # nltk
369
+ # tiktoken
370
+ # transformers
371
+ requests==2.32.5
372
+ # via
373
+ # generative-ai-portfolio-project (pyproject.toml)
374
+ # datasets
375
+ # huggingface-hub
376
+ # instructor
377
+ # langchain-classic
378
+ # langchain-community
379
+ # langsmith
380
+ # requests-toolbelt
381
+ # tiktoken
382
+ # transformers
383
+ # unstructured
384
+ requests-toolbelt==1.0.0
385
+ # via
386
+ # langsmith
387
+ # unstructured-client
388
+ rich==14.2.0
389
+ # via
390
+ # instructor
391
+ # ragas
392
+ # typer
393
+ safetensors==0.6.2
394
+ # via transformers
395
+ scikit-learn==1.7.2
396
+ # via sentence-transformers
397
+ scikit-network==0.33.3
398
+ # via ragas
399
+ scipy==1.15.3
400
+ # via
401
+ # scikit-learn
402
+ # scikit-network
403
+ # sentence-transformers
404
+ sentence-transformers==5.1.2
405
+ # via generative-ai-portfolio-project (pyproject.toml)
406
+ shellingham==1.5.4
407
+ # via typer
408
+ six==1.17.0
409
+ # via
410
+ # html5lib
411
+ # langdetect
412
+ # python-dateutil
413
+ smmap==5.0.2
414
+ # via gitdb
415
+ sniffio==1.3.1
416
+ # via
417
+ # anyio
418
+ # openai
419
+ soupsieve==2.8
420
+ # via beautifulsoup4
421
+ sqlalchemy==2.0.44
422
+ # via
423
+ # langchain-classic
424
+ # langchain-community
425
+ starlette==0.48.0
426
+ # via fastapi
427
+ sympy==1.14.0
428
+ # via torch
429
+ tenacity==9.1.2
430
+ # via
431
+ # instructor
432
+ # langchain-community
433
+ # langchain-core
434
+ threadpoolctl==3.6.0
435
+ # via scikit-learn
436
+ tiktoken==0.12.0
437
+ # via
438
+ # langchain-openai
439
+ # ragas
440
+ tokenizers==0.22.1
441
+ # via transformers
442
+ torch==2.9.0
443
+ # via sentence-transformers
444
+ tqdm==4.67.1
445
+ # via
446
+ # datasets
447
+ # huggingface-hub
448
+ # nltk
449
+ # openai
450
+ # ragas
451
+ # sentence-transformers
452
+ # transformers
453
+ # unstructured
454
+ transformers==4.57.1
455
+ # via sentence-transformers
456
+ typer==0.20.0
457
+ # via
458
+ # instructor
459
+ # ragas
460
+ typing-extensions==4.15.0
461
+ # via
462
+ # aiosignal
463
+ # anyio
464
+ # beautifulsoup4
465
+ # cryptography
466
+ # exceptiongroup
467
+ # fastapi
468
+ # grpcio
469
+ # huggingface-hub
470
+ # langchain-core
471
+ # multidict
472
+ # openai
473
+ # pydantic
474
+ # pydantic-core
475
+ # pypdf
476
+ # python-docx
477
+ # python-oxmsg
478
+ # sentence-transformers
479
+ # sqlalchemy
480
+ # starlette
481
+ # torch
482
+ # typer
483
+ # typing-inspect
484
+ # typing-inspection
485
+ # unstructured
486
+ # uvicorn
487
+ # virtualenv
488
+ typing-inspect==0.9.0
489
+ # via dataclasses-json
490
+ typing-inspection==0.4.2
491
+ # via
492
+ # pydantic
493
+ # pydantic-settings
494
+ tzdata==2025.2
495
+ # via pandas
496
+ unstructured==0.18.15
497
+ # via generative-ai-portfolio-project (pyproject.toml)
498
+ unstructured-client==0.42.3
499
+ # via unstructured
500
+ urllib3==2.5.0
501
+ # via
502
+ # qdrant-client
503
+ # requests
504
+ uvicorn==0.38.0
505
+ # via generative-ai-portfolio-project (pyproject.toml)
506
+ virtualenv==20.35.3
507
+ # via pre-commit
508
+ webencodings==0.5.1
509
+ # via html5lib
510
+ wrapt==2.0.0
511
+ # via unstructured
512
+ xxhash==3.6.0
513
+ # via
514
+ # datasets
515
+ # langgraph
516
+ yarl==1.22.0
517
+ # via aiohttp
518
+ zstandard==0.25.0
519
+ # via langsmith