Sp2503 commited on
Commit
c4ff027
·
verified ·
1 Parent(s): 29f9811

Delete Muril-Model/Muril-Model

Browse files
Muril-Model/Muril-Model/.gitattributes DELETED
@@ -1,36 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- muril_multilingual_dataset.csv filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Muril-Model/Muril-Model/.gitignore DELETED
@@ -1,45 +0,0 @@
1
- # Python
2
- __pycache__/
3
- *.py[cod]
4
- *$py.class
5
- *.so
6
- .Python
7
- build/
8
- develop-eggs/
9
- dist/
10
- downloads/
11
- eggs/
12
- .eggs/
13
- lib/
14
- lib64/
15
- parts/
16
- sdist/
17
- var/
18
- wheels/
19
- *.egg-info/
20
- .installed.cfg
21
- *.egg
22
-
23
- # Virtual Environment
24
- venv/
25
- env/
26
- .env/
27
- .venv/
28
-
29
- # IDEs and Editors
30
- .idea/
31
- .vscode/
32
- *.swp
33
- *.swo
34
- *~
35
-
36
- # Jupyter Notebook
37
- .ipynb_checkpoints
38
-
39
- # Model files and data
40
- *.h5
41
- *.pkl
42
- *.model
43
- data/
44
- models/
45
- logs/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Muril-Model/Muril-Model/Dockerfile DELETED
@@ -1,28 +0,0 @@
1
- # Lightweight Python image
2
- FROM python:3.10-slim
3
-
4
- # Disable CUDA & set cache
5
- ENV TORCH_DISABLE_CUDA=1
6
- ENV HF_HOME=/app/hf_cache
7
- ENV TRANSFORMERS_CACHE=/app/hf_cache
8
-
9
- # Working directory
10
- WORKDIR /app
11
-
12
- # Install dependencies
13
- COPY requirements.txt .
14
- RUN apt-get update && apt-get install -y git && \
15
- pip install --no-cache-dir -r requirements.txt && \
16
- rm -rf /var/lib/apt/lists/*
17
-
18
- # Copy app code and model/data
19
- COPY . .
20
-
21
- # Make cache folder writable
22
- RUN mkdir -p /app/hf_cache && chmod -R 777 /app/hf_cache
23
-
24
- # Expose port
25
- EXPOSE 8080
26
-
27
- # Run FastAPI via uvicorn
28
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Muril-Model/Muril-Model/README.md DELETED
@@ -1,10 +0,0 @@
1
- ---
2
- title: Muril Model
3
- emoji: 🌖
4
- colorFrom: gray
5
- colorTo: red
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
Muril-Model/Muril-Model/answer_embeddings.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fa2e749567247c8a15144c6a0b1d3423ae8a8a0054aee9f3cc2774f8b9cb555
3
- size 83854959
 
 
 
 
Muril-Model/Muril-Model/main.py DELETED
@@ -1,70 +0,0 @@
1
- import os
2
- import torch
3
- import pandas as pd
4
- from fastapi import FastAPI
5
- from pydantic import BaseModel
6
- from sentence_transformers import SentenceTransformer, util
7
- from huggingface_hub import snapshot_download
8
-
9
- # --- Cache Configuration ---
10
- os.environ["HF_HOME"] = "/app/hf_cache"
11
- os.environ["TRANSFORMERS_CACHE"] = "/app/hf_cache"
12
- os.environ["TORCH_DISABLE_CUDA"] = "1"
13
-
14
- # --- Hugging Face Repo ---
15
- HF_REPO = "Sp2503/Muril-Model"
16
-
17
- # --- Download model & embeddings from Hugging Face Hub ---
18
- print("📦 Downloading model & embeddings from Hugging Face Hub...")
19
- model_dir = snapshot_download(repo_id=HF_REPO, repo_type="model")
20
- print(f"✅ Model snapshot available at: {model_dir}")
21
-
22
- MODEL_PATH = model_dir
23
- CSV_PATH = os.path.join(model_dir, "muril_multilingual_dataset.csv")
24
- EMBED_PATH = os.path.join(model_dir, "answer_embeddings.pt")
25
-
26
- # --- Load resources ---
27
- print("⚙️ Loading model and embeddings...")
28
- model = SentenceTransformer(MODEL_PATH)
29
- df = pd.read_csv(CSV_PATH).dropna(subset=['question', 'answer'])
30
- answer_embeddings = torch.load(EMBED_PATH, map_location="cpu")
31
- print("✅ Model and embeddings loaded successfully.")
32
-
33
- # --- FastAPI Setup ---
34
- app = FastAPI(title="MuRIL Multilingual QA API")
35
-
36
- class QueryRequest(BaseModel):
37
- question: str
38
- lang: str = None
39
-
40
- class QAResponse(BaseModel):
41
- answer: str
42
-
43
- @app.get("/")
44
- def root():
45
- return {"status": "✅ API ready", "model_loaded": True}
46
-
47
- @app.post("/get-answer", response_model=QAResponse)
48
- def get_answer_endpoint(request: QueryRequest):
49
- question_text = request.question.strip()
50
- lang_filter = request.lang
51
-
52
- filtered_df = df
53
- filtered_embeddings = answer_embeddings
54
- if 'lang' in df.columns and lang_filter:
55
- mask = df['lang'] == lang_filter
56
- filtered_df = df[mask].reset_index(drop=True)
57
- filtered_embeddings = answer_embeddings[mask.values]
58
-
59
- if len(filtered_df) == 0:
60
- return {"answer": f"No data found for language '{lang_filter}'."}
61
-
62
- question_emb = model.encode(question_text, convert_to_tensor=True)
63
- cosine_scores = util.pytorch_cos_sim(question_emb, filtered_embeddings)
64
- best_idx = torch.argmax(cosine_scores).item()
65
- answer = filtered_df.iloc[best_idx]['answer']
66
- return {"answer": answer}
67
-
68
- if __name__ == "__main__":
69
- import uvicorn
70
- uvicorn.run("main:app", host="0.0.0.0", port=8080)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Muril-Model/Muril-Model/muril_combined_multilingual_model/1_Pooling/config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "word_embedding_dimension": 768,
3
- "pooling_mode_cls_token": false,
4
- "pooling_mode_mean_tokens": true,
5
- "pooling_mode_max_tokens": false,
6
- "pooling_mode_mean_sqrt_len_tokens": false,
7
- "pooling_mode_weightedmean_tokens": false,
8
- "pooling_mode_lasttoken": false,
9
- "include_prompt": true
10
- }
 
 
 
 
 
 
 
 
 
 
 
Muril-Model/Muril-Model/muril_combined_multilingual_model/config.json DELETED
@@ -1,25 +0,0 @@
1
- {
2
- "architectures": [
3
- "BertModel"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "classifier_dropout": null,
7
- "dtype": "float32",
8
- "embedding_size": 768,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 3072,
14
- "layer_norm_eps": 1e-12,
15
- "max_position_embeddings": 512,
16
- "model_type": "bert",
17
- "num_attention_heads": 12,
18
- "num_hidden_layers": 12,
19
- "pad_token_id": 0,
20
- "position_embedding_type": "absolute",
21
- "transformers_version": "4.56.2",
22
- "type_vocab_size": 2,
23
- "use_cache": true,
24
- "vocab_size": 197285
25
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Muril-Model/Muril-Model/muril_combined_multilingual_model/config_sentence_transformers.json DELETED
@@ -1,14 +0,0 @@
1
- {
2
- "model_type": "SentenceTransformer",
3
- "__version__": {
4
- "sentence_transformers": "5.1.1",
5
- "transformers": "4.56.2",
6
- "pytorch": "2.8.0+cu126"
7
- },
8
- "prompts": {
9
- "query": "",
10
- "document": ""
11
- },
12
- "default_prompt_name": null,
13
- "similarity_fn_name": "cosine"
14
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Muril-Model/Muril-Model/muril_combined_multilingual_model/modules.json DELETED
@@ -1,14 +0,0 @@
1
- [
2
- {
3
- "idx": 0,
4
- "name": "0",
5
- "path": "",
6
- "type": "sentence_transformers.models.Transformer"
7
- },
8
- {
9
- "idx": 1,
10
- "name": "1",
11
- "path": "1_Pooling",
12
- "type": "sentence_transformers.models.Pooling"
13
- }
14
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Muril-Model/Muril-Model/muril_combined_multilingual_model/sentence_bert_config.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "max_seq_length": 512,
3
- "do_lower_case": false
4
- }
 
 
 
 
 
Muril-Model/Muril-Model/muril_combined_multilingual_model/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
Muril-Model/Muril-Model/muril_combined_multilingual_model/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
Muril-Model/Muril-Model/muril_combined_multilingual_model/tokenizer_config.json DELETED
@@ -1,59 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "100": {
12
- "content": "[UNK]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "103": {
20
- "content": "[MASK]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "104": {
28
- "content": "[CLS]",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "105": {
36
- "content": "[SEP]",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "clean_up_tokenization_spaces": true,
45
- "cls_token": "[CLS]",
46
- "do_basic_tokenize": true,
47
- "do_lower_case": false,
48
- "extra_special_tokens": {},
49
- "lowercase": false,
50
- "mask_token": "[MASK]",
51
- "model_max_length": 512,
52
- "never_split": null,
53
- "pad_token": "[PAD]",
54
- "sep_token": "[SEP]",
55
- "strip_accents": false,
56
- "tokenize_chinese_chars": true,
57
- "tokenizer_class": "BertTokenizer",
58
- "unk_token": "[UNK]"
59
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Muril-Model/Muril-Model/muril_combined_multilingual_model/vocab.txt DELETED
The diff for this file is too large to render. See raw diff
 
Muril-Model/Muril-Model/muril_multilingual_dataset.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:09cca0ed98f57664e558825059722272d15fe99f5238969e95f523629fb50cec
3
- size 16996056
 
 
 
 
Muril-Model/Muril-Model/precompute_embeddings.py DELETED
@@ -1,17 +0,0 @@
1
- import torch
2
- import pandas as pd
3
- from sentence_transformers import SentenceTransformer
4
-
5
- MODEL_PATH = './muril_combined_multilingual_model'
6
- CSV_PATH = './muril_multilingual_dataset.csv'
7
- EMB_PATH = './answer_embeddings.pt'
8
-
9
- print("🔄 Precomputing embeddings...")
10
- model = SentenceTransformer(MODEL_PATH)
11
- df = pd.read_csv(CSV_PATH).dropna(subset=['question', 'answer'])
12
-
13
- answers = df['answer'].tolist()
14
- answer_embeddings = model.encode(answers, convert_to_tensor=True)
15
- torch.save(answer_embeddings, EMB_PATH)
16
-
17
- print(f"✅ Saved {len(answers)} embeddings to {EMB_PATH}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Muril-Model/Muril-Model/requirements.txt DELETED
@@ -1,12 +0,0 @@
1
- fastapi==0.118.0
2
- uvicorn==0.37.0
3
- torch==2.1.0
4
- sentence-transformers==5.1.1
5
- transformers==4.43.3
6
- numpy<2
7
- pandas==2.1.1
8
- langdetect==1.0.9
9
- requests==2.31.0
10
- tqdm==4.65.0
11
- PyMuPDF==1.23.0
12
- huggingface_hub==0.23.4