MohitGupta41 commited on
Commit
4b9a655
·
1 Parent(s): a75e6eb

Initial Commit

Browse files
.gitattributes CHANGED
@@ -1,35 +1,4 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.py filter=lfs diff=lfs merge=lfs -text
2
+ *.ipynb filter=lfs diff=lfs merge=lfs -text
3
+ *.ttf filter=lfs diff=lfs merge=lfs -text
4
+ *.pdf filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pycache/
2
+ *.pyc
3
+ .venv
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="dark"
3
+ primaryColor="#3B82F6"
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ # System deps for ocrmypdf + fonts + runtime
4
+ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
5
+ ocrmypdf tesseract-ocr ghostscript qpdf pngquant unpaper \
6
+ fonts-noto fonts-noto-cjk fonts-noto-unhinted fonts-noto-color-emoji \
7
+ libglib2.0-0 libgl1 \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # app code
11
+ WORKDIR /app
12
+ COPY requirements.txt ./requirements.txt
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ COPY . .
16
+
17
+ # streamlit port on Spaces is 7860 by convention
18
+ ENV PORT=7860
19
+ EXPOSE 7860
20
+
21
+ # Ensure output dirs exist
22
+ RUN mkdir -p output_pdfs temp
23
+
24
+ # Streamlit config
25
+ ENV STREAMLIT_SERVER_PORT=7860
26
+ ENV STREAMLIT_SERVER_HEADLESS=true
27
+ ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
28
+
29
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
Notebook/notebook.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b187df0cfb55d0517d13ac484cbb920e229d292d54f7db2114220d143667ee04
3
+ size 262601
Notebook/notebook2.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8428c01520dce3e2098c0e0328dd69d8280ceeba6b46ee8e4fbd76211e79ed5a
3
+ size 399743
Notebook/notebook3.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b778339407913f774c9a9c5d58cd2045a1523cf61dc2499d9d1c384852a64a95
3
+ size 270908
Notebook/notebook4.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:796016ae706028cd1ee126978537a6b8100f202837078b975554cdbf74be2088
3
+ size 45168
Notebook/notebook5.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f29def08df2f8a8067fd246a4f73dcce05cae4606ad609e354e048926517e6f3
3
+ size 149852
Notebook/temp/ocr_fixed.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76985c22eabb5c76866b2a14a4affae1305380d7288b8c9a3cc6321514e56f81
3
+ size 8202753
PDF_Translate/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1386ed2d1dd2ef590be5ef2ea86cbff8456484e1a84c554fddbbb1f108fab526
3
+ size 24
PDF_Translate/cli.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06be90dd5253873a72b614b08d5eaaff8289e59be9d33f6c6c08d6cf2bcb3b1f
3
+ size 6237
PDF_Translate/constants.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:327964f0b072424f8ac46143b56c636d65ab148a4209ba6f53f524358144ef27
3
+ size 1106
PDF_Translate/hybrid.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf57e2185fafb85f9bb158d1c0283074826ca02c002b931d4bdabadd2aaec040
3
+ size 6063
PDF_Translate/ocr.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dceea223700e784f853e5ecca04031a0a8c32570ab9cf71687f090a47296047b
3
+ size 2343
PDF_Translate/overlay.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9aa45d7e145ea379fa141c28efccbfaaa08e04999e27ef4b3039ef2dab4cfd0
3
+ size 10788
PDF_Translate/pipeline.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5db8156e416cd4da5dd2e9b7dd831c90cc632ef53d20499081173c5e9443f30
3
+ size 18824
PDF_Translate/textlayer.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3970161c69a797e76c4a825a7d8414964ce6704db0ffac7d65a4e6327193e928
3
+ size 11925
PDF_Translate/utils.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:232895507f730581719776fec888d9f7f07bfccb64475340fe596835c64ca811
3
+ size 6267
app.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb01b0a35bdfc3534a56719c037bb90afaa0ecf35974bef57e7d87514f211f6c
3
+ size 12251
assets/fonts/Hind-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01de158022f53077b52303e46de3b0ab5fb245222a7ffe25a2a57fdd9e969162
3
+ size 299532
assets/fonts/Karma-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc3457d2867b67ee956d95206dbc831e57b26975219d3d4e68d60f51f67b4b55
3
+ size 339812
assets/fonts/Mukta-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2958e4af564507df2a856164df6f9978dacb03f999a4f34a0c269dc8a4de9688
3
+ size 432248
assets/fonts/NotoSans-Bold.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c976e4b1b99edc88775377fcc21692ca4bfa46b6d6ca6522bfda505b28ff9d6a
3
+ size 575740
assets/fonts/NotoSans-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b85c38ecea8a7cfb39c24e395a4007474fa5a4fc864f6ee33309eb4948d232d5
3
+ size 569208
assets/fonts/NotoSansDevanagari-Bold.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19cc835a064c4af63e3c20feb54f5bf51dc25ffa52c0f493a23904572af8b26e
3
+ size 225748
assets/fonts/NotoSansDevanagari-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:385e78e6359a9d88a0f243d53b1209d7548361ba2194e2b9ec779bcaa7e8949d
3
+ size 219212
assets/fonts/TiroDevanagariHindi-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2abcb4d352f0bfab91632df5d9c8173882073c182e662fa731a5a738e6681d7
3
+ size 423224
assets/samples/Test1.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38b316e937344ff9c1d536d9809f803df6c1130f8cbb3a6102d5fed8b029492c
3
+ size 245658
assets/samples/Test1_translated.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f80f2a1e0dde378590b7f2387d8929fbe65c6e8dec8b59d6c9926a2df2030bd
3
+ size 448186
assets/samples/Test2.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84cc71827279b3d4edfe6b3b2c4a67a01d58d3ab08cd9fe4eb256870c75fabf4
3
+ size 462464
assets/samples/Test2_translated.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9a2d58323263978b6f53d767642cfafc50003b9a81cc670799790328a341120
3
+ size 494623
assets/samples/Test3.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b43902250834482ca3bbbeb10547773a25f595de40262a004812a69b17e405a6
3
+ size 1251666
assets/samples/Test3_translated.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:939e0785b078e4830e2847ba32d984ca31bb521d45f1ebf3a3aeea4562b77dc7
3
+ size 650922
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit>=1.35.0
2
+ pymupdf>=1.24.0
3
+ pillow>=10.3.0
4
+ # pytesseract>=0.3.10
5
+
6
+ ocrmypdf
7
+ googletrans
8
+
9
+
10
+ streamlit==1.38.0
11
+ pymupdf==1.24.9
12
+ # googletrans==4.0.0rc1
13
+ # Pillow==10.4.0
14
+ # nest_asyncio==1.6.0