nabin2004 commited on
Commit
98eb250
·
1 Parent(s): cdac4f1
Files changed (1) hide show
  1. app.py +31 -18
app.py CHANGED
@@ -4,7 +4,7 @@ from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering
4
 
5
  def find_answer(question, context, model, tokenizer):
6
  # Tokenize the input question and context
7
- inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt")
8
  input_ids = inputs["input_ids"].tolist()[0]
9
 
10
  # Get the logits for the start and end positions
@@ -22,27 +22,40 @@ def find_answer(question, context, model, tokenizer):
22
  answer = tokenizer.decode(input_ids[start_idx:end_idx+1], skip_special_tokens=True)
23
  return answer
24
 
 
 
 
 
 
 
 
 
 
25
  def main():
26
  st.title("Shree5 GPT: By Tech Ninja Group")
27
 
28
- text = st.text_area('Enter your citizenship-related question:', max_chars=512)
29
-
30
- # Load the DistilBERT model and tokenizer
31
- model_name = "distilbert-base-uncased"
32
- tokenizer = DistilBertTokenizer.from_pretrained(model_name)
33
- model = DistilBertForQuestionAnswering.from_pretrained(model_name)
34
-
35
- # Read the context from the text file
36
- with open("nepal_citizenship.txt", "r") as file:
37
- context = file.read()
38
-
39
  if text:
40
- answer = find_answer(text, context, model, tokenizer)
41
-
42
- if answer:
43
- st.markdown(f"**Answer:** {answer}")
44
- else:
45
- st.markdown("No answer found in the provided context for the given question.")
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  if __name__ == "__main__":
48
  main()
 
4
 
5
  def find_answer(question, context, model, tokenizer):
6
  # Tokenize the input question and context
7
+ inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt", max_length=512, truncation=True)
8
  input_ids = inputs["input_ids"].tolist()[0]
9
 
10
  # Get the logits for the start and end positions
 
22
  answer = tokenizer.decode(input_ids[start_idx:end_idx+1], skip_special_tokens=True)
23
  return answer
24
 
25
+ def read_file(file_path):
26
+ with open(file_path, "rb") as f:
27
+ raw_data = f.read()
28
+ result = chardet.detect(raw_data)
29
+ encoding = result['encoding']
30
+ with open(file_path, "r", encoding=encoding) as file:
31
+ context = file.read()
32
+ return context
33
+
34
  def main():
35
  st.title("Shree5 GPT: By Tech Ninja Group")
36
 
37
+ text = st.text_input('Enter your citizenship-related question:')
38
+
 
 
 
 
 
 
 
 
 
39
  if text:
40
+ # Load the DistilBERT model and tokenizer
41
+ model_name = "distilbert-base-uncased"
42
+ tokenizer = DistilBertTokenizer.from_pretrained(model_name)
43
+ model = DistilBertForQuestionAnswering.from_pretrained(model_name)
44
+
45
+ # Read the context from the text file
46
+ context = read_file("inputed.txt")
47
+
48
+ # Split the text into smaller segments if needed
49
+ segments = [context[i:i+512] for i in range(0, len(context), 512)]
50
+ answers = []
51
+ for i, segment in enumerate(segments):
52
+ st.write(f"Generating answer for segment {i+1}...")
53
+ answer = find_answer(text, segment, model, tokenizer)
54
+ answers.append(answer)
55
+
56
+ st.write("Answer:")
57
+ for i, answer in enumerate(answers):
58
+ st.write(f"Segment {i+1}: {answer}")
59
 
60
  if __name__ == "__main__":
61
  main()