Amritpal Singh commited on
Commit
d6fd044
·
1 Parent(s): aac7268

Added all project files

Browse files
Dockerfile CHANGED
@@ -2,20 +2,19 @@ FROM python:3.9-slim
2
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- software-properties-common \
9
- git \
10
- && rm -rf /var/lib/apt/lists/*
11
 
12
- COPY requirements.txt ./
13
- COPY src/ ./src/
 
 
14
 
15
- RUN pip3 install -r requirements.txt
 
16
 
 
17
  EXPOSE 8501
18
 
19
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
-
21
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
2
 
3
  WORKDIR /app
4
 
5
+ # Install git (optional, useful for huggingface model downloads)
6
+ RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
 
 
 
 
7
 
8
+ # Copy requirements.txt and install dependencies with no cache to reduce image size
9
+ COPY requirements.txt .
10
+ RUN pip install --upgrade pip
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
 
13
+ # Copy all app files
14
+ COPY . .
15
 
16
+ # Expose Streamlit default port
17
  EXPOSE 8501
18
 
19
+ # Run Streamlit app
20
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import BertForQuestionAnswering, BertTokenizer
4
+
5
+ # Set page config
6
+ st.set_page_config(page_title="BERT Question Answering System", layout="centered")
7
+
8
+ # Load model and tokenizer
9
+ @st.cache_resource
10
+ def load_model():
11
+ model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
12
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
13
+ return model, tokenizer
14
+
15
+ model, tokenizer = load_model()
16
+
17
+ # Function to get answer
18
+ def get_answer(question, context):
19
+ inputs = tokenizer.encode_plus(question, context, return_tensors='pt', max_length=512, truncation=True)
20
+ input_ids = inputs['input_ids'].tolist()[0]
21
+
22
+ with torch.no_grad():
23
+ outputs = model(**inputs)
24
+
25
+ answer_start = torch.argmax(outputs.start_logits)
26
+ answer_end = torch.argmax(outputs.end_logits) + 1
27
+
28
+ answer = tokenizer.convert_tokens_to_string(
29
+ tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
30
+ )
31
+
32
+ return answer
33
+
34
+ # App title and description
35
+ st.title("🤖 BERT Question Answering System")
36
+ st.write("This app uses BERT to answer questions based on a given context.")
37
+
38
+ # Input sections
39
+ context = st.text_area("📄 Enter the context/passage:", height=200)
40
+ question = st.text_input("❓ Ask a question about the context:")
41
+
42
+ # Answer button
43
+ if st.button("Get Answer"):
44
+ if not context or not question:
45
+ st.warning("Please provide both a context and a question.")
46
+ else:
47
+ try:
48
+ answer = get_answer(question, context)
49
+ if answer:
50
+ st.success(f"📄 Answer: {answer}")
51
+ else:
52
+ st.warning("No answer found in the given context.")
53
+ except Exception as e:
54
+ st.error(f"An error occurred: {str(e)}")
55
+
56
+ # Add some styling
57
+ st.markdown("""
58
+ <style>
59
+ .stTextInput input, .stTextArea textarea {
60
+ font-size: 16px !important;
61
+ }
62
+ .stButton button {
63
+ background-color: #4CAF50;
64
+ color: white;
65
+ font-weight: bold;
66
+ padding: 0.5rem 1rem;
67
+ border-radius: 5px;
68
+ }
69
+ .stButton button:hover {
70
+ background-color: #45a049;
71
+ }
72
+ </style>
73
+ """, unsafe_allow_html=True)
74
+
75
+ # Footer
76
+ st.markdown("---")
77
+ st.markdown("Built with ❤️ using Streamlit and HuggingFace Transformers")
qa_model/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForQuestionAnswering"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.52.4",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
qa_model/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
qa_model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_model/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
qa_model/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
- altair
2
- pandas
3
- streamlit
 
 
1
+ streamlit
2
+ torch
3
+ transformers
4
+ pandas