SujathaL commited on
Commit
2a2dd2f
·
verified ·
1 Parent(s): 243c4ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -3,11 +3,15 @@ import pdfplumber
3
  import faiss
4
  import numpy as np
5
  from sentence_transformers import SentenceTransformer
6
- from transformers import pipeline
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
 
9
- # Load Models
10
- qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
 
 
 
 
11
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
12
 
13
  # Function to Extract & Clean PDF Text
@@ -36,6 +40,13 @@ def find_best_chunk(question, index, chunks, embeddings):
36
  _, closest_idx = index.search(np.array(question_embedding), 1)
37
  return chunks[closest_idx[0][0]]
38
 
 
 
 
 
 
 
 
39
  # Streamlit UI
40
  st.title("Chat with AWS Restart PDF (Like ChatPDF)")
41
 
@@ -52,5 +63,5 @@ question = st.text_input("Ask a question about AWS Restart program:")
52
 
53
  if st.button("Get Answer") and question:
54
  relevant_chunk = find_best_chunk(question, index, chunks, embeddings)
55
- response = qa_pipeline(question=question, context=relevant_chunk)
56
- st.write("Answer:", response['answer'])
 
3
  import faiss
4
  import numpy as np
5
  from sentence_transformers import SentenceTransformer
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
 
9
+ # Load Mistral-7B Model for Generative Answers
10
+ model_name = "mistralai/Mistral-7B-Instruct" # More advanced model
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForCausalLM.from_pretrained(model_name)
13
+
14
+ # Load Sentence Embeddings Model for Better Context Matching
15
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
16
 
17
  # Function to Extract & Clean PDF Text
 
40
  _, closest_idx = index.search(np.array(question_embedding), 1)
41
  return chunks[closest_idx[0][0]]
42
 
43
+ # Function to Generate a Long, Detailed Answer
44
+ def get_answer(question, context):
45
+ input_text = f"Question: {question}\nContext: {context}\nAnswer:"
46
+ inputs = tokenizer(input_text, return_tensors="pt")
47
+ output = model.generate(**inputs, max_length=300, temperature=0.7)
48
+ return tokenizer.decode(output[0], skip_special_tokens=True)
49
+
50
  # Streamlit UI
51
  st.title("Chat with AWS Restart PDF (Like ChatPDF)")
52
 
 
63
 
64
  if st.button("Get Answer") and question:
65
  relevant_chunk = find_best_chunk(question, index, chunks, embeddings)
66
+ response = get_answer(question, relevant_chunk)
67
+ st.write("Answer:", response)