Spaces:

nabin2004
/

finalShree5GPT

Build error

App Files Files Community

nabin2004 commited on Oct 1, 2023

Commit

98eb250

1 Parent(s): cdac4f1

done done

Browse files

Files changed (1) hide show

app.py +31 -18

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering
 def find_answer(question, context, model, tokenizer):
     # Tokenize the input question and context
-    inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt")
     input_ids = inputs["input_ids"].tolist()[0]
     # Get the logits for the start and end positions
@@ -22,27 +22,40 @@ def find_answer(question, context, model, tokenizer):
     answer = tokenizer.decode(input_ids[start_idx:end_idx+1], skip_special_tokens=True)
     return answer
 def main():
     st.title("Shree5 GPT: By Tech Ninja Group")
-    text = st.text_area('Enter your citizenship-related question:', max_chars=512)
-    # Load the DistilBERT model and tokenizer
-    model_name = "distilbert-base-uncased"
-    tokenizer = DistilBertTokenizer.from_pretrained(model_name)
-    model = DistilBertForQuestionAnswering.from_pretrained(model_name)
-    # Read the context from the text file
-    with open("nepal_citizenship.txt", "r") as file:
-        context = file.read()
     if text:
-        answer = find_answer(text, context, model, tokenizer)
-        if answer:
-            st.markdown(f"**Answer:** {answer}")
-        else:
-            st.markdown("No answer found in the provided context for the given question.")
 if __name__ == "__main__":
     main()

 def find_answer(question, context, model, tokenizer):
     # Tokenize the input question and context
+    inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt", max_length=512, truncation=True)
     input_ids = inputs["input_ids"].tolist()[0]
     # Get the logits for the start and end positions
     answer = tokenizer.decode(input_ids[start_idx:end_idx+1], skip_special_tokens=True)
     return answer
+def read_file(file_path):
+    with open(file_path, "rb") as f:
+        raw_data = f.read()
+        result = chardet.detect(raw_data)
+        encoding = result['encoding']
+    with open(file_path, "r", encoding=encoding) as file:
+        context = file.read()
+    return context
 def main():
     st.title("Shree5 GPT: By Tech Ninja Group")
+    text = st.text_input('Enter your citizenship-related question:')
     if text:
+        # Load the DistilBERT model and tokenizer
+        model_name = "distilbert-base-uncased"
+        tokenizer = DistilBertTokenizer.from_pretrained(model_name)
+        model = DistilBertForQuestionAnswering.from_pretrained(model_name)
+        # Read the context from the text file
+        context = read_file("inputed.txt")
+        # Split the text into smaller segments if needed
+        segments = [context[i:i+512] for i in range(0, len(context), 512)]
+        answers = []
+        for i, segment in enumerate(segments):
+            st.write(f"Generating answer for segment {i+1}...")
+            answer = find_answer(text, segment, model, tokenizer)
+            answers.append(answer)
+        st.write("Answer:")
+        for i, answer in enumerate(answers):
+            st.write(f"Segment {i+1}: {answer}")
 if __name__ == "__main__":
     main()