Spaces:

amritn8
/

Leo

Sleeping

App Files Files Community

Amritpal Singh commited on Jun 12, 2025

Commit

d6fd044

1 Parent(s): aac7268

Added all project files

Browse files

Files changed (8) hide show

Dockerfile +11 -12
app.py +77 -0
qa_model/config.json +25 -0
qa_model/special_tokens_map.json +7 -0
qa_model/tokenizer.json +0 -0
qa_model/tokenizer_config.json +56 -0
qa_model/vocab.txt +0 -0
requirements.txt +4 -3

Dockerfile CHANGED Viewed

@@ -2,20 +2,19 @@ FROM python:3.9-slim
 WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    software-properties-common \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
 EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

 WORKDIR /app
+# Install git (optional, useful for huggingface model downloads)
+RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
+# Copy requirements.txt and install dependencies with no cache to reduce image size
+COPY requirements.txt .
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy all app files
+COPY . .
+# Expose Streamlit default port
 EXPOSE 8501
+# Run Streamlit app
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import streamlit as st
+import torch
+from transformers import BertForQuestionAnswering, BertTokenizer
+# Set page config
+st.set_page_config(page_title="BERT Question Answering System", layout="centered")
+# Load model and tokenizer
+@st.cache_resource
+def load_model():
+    model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
+    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+    return model, tokenizer
+model, tokenizer = load_model()
+# Function to get answer
+def get_answer(question, context):
+    inputs = tokenizer.encode_plus(question, context, return_tensors='pt', max_length=512, truncation=True)
+    input_ids = inputs['input_ids'].tolist()[0]
+    with torch.no_grad():
+        outputs = model(**inputs)
+    answer_start = torch.argmax(outputs.start_logits)
+    answer_end = torch.argmax(outputs.end_logits) + 1
+    answer = tokenizer.convert_tokens_to_string(
+        tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
+    )
+    return answer
+# App title and description
+st.title("🤖 BERT Question Answering System")
+st.write("This app uses BERT to answer questions based on a given context.")
+# Input sections
+context = st.text_area("📄 Enter the context/passage:", height=200)
+question = st.text_input("❓ Ask a question about the context:")
+# Answer button
+if st.button("Get Answer"):
+    if not context or not question:
+        st.warning("Please provide both a context and a question.")
+    else:
+        try:
+            answer = get_answer(question, context)
+            if answer:
+                st.success(f"📄 Answer: {answer}")
+            else:
+                st.warning("No answer found in the given context.")
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
+# Add some styling
+st.markdown("""
+<style>
+    .stTextInput input, .stTextArea textarea {
+        font-size: 16px !important;
+    }
+    .stButton button {
+        background-color: #4CAF50;
+        color: white;
+        font-weight: bold;
+        padding: 0.5rem 1rem;
+        border-radius: 5px;
+    }
+    .stButton button:hover {
+        background-color: #45a049;
+    }
+</style>
+""", unsafe_allow_html=True)
+# Footer
+st.markdown("---")
+st.markdown("Built with ❤️ using Streamlit and HuggingFace Transformers")

qa_model/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "architectures": [
+    "BertForQuestionAnswering"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.4",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

qa_model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

qa_model/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

qa_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

qa_model/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
-altair
-pandas
-streamlit

+streamlit
+torch
+transformers
+pandas