Update app.py
Browse files
app.py
CHANGED
|
@@ -3,11 +3,15 @@ import pdfplumber
|
|
| 3 |
import faiss
|
| 4 |
import numpy as np
|
| 5 |
from sentence_transformers import SentenceTransformer
|
| 6 |
-
from transformers import
|
| 7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 8 |
|
| 9 |
-
# Load
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 12 |
|
| 13 |
# Function to Extract & Clean PDF Text
|
|
@@ -36,6 +40,13 @@ def find_best_chunk(question, index, chunks, embeddings):
|
|
| 36 |
_, closest_idx = index.search(np.array(question_embedding), 1)
|
| 37 |
return chunks[closest_idx[0][0]]
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
# Streamlit UI
|
| 40 |
st.title("Chat with AWS Restart PDF (Like ChatPDF)")
|
| 41 |
|
|
@@ -52,5 +63,5 @@ question = st.text_input("Ask a question about AWS Restart program:")
|
|
| 52 |
|
| 53 |
if st.button("Get Answer") and question:
|
| 54 |
relevant_chunk = find_best_chunk(question, index, chunks, embeddings)
|
| 55 |
-
response =
|
| 56 |
-
st.write("Answer:", response
|
|
|
|
| 3 |
import faiss
|
| 4 |
import numpy as np
|
| 5 |
from sentence_transformers import SentenceTransformer
|
| 6 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 8 |
|
| 9 |
+
# Load Mistral-7B Model for Generative Answers
|
| 10 |
+
model_name = "mistralai/Mistral-7B-Instruct" # More advanced model
|
| 11 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 12 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 13 |
+
|
| 14 |
+
# Load Sentence Embeddings Model for Better Context Matching
|
| 15 |
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 16 |
|
| 17 |
# Function to Extract & Clean PDF Text
|
|
|
|
| 40 |
_, closest_idx = index.search(np.array(question_embedding), 1)
|
| 41 |
return chunks[closest_idx[0][0]]
|
| 42 |
|
| 43 |
+
# Function to Generate a Long, Detailed Answer
|
| 44 |
+
def get_answer(question, context):
|
| 45 |
+
input_text = f"Question: {question}\nContext: {context}\nAnswer:"
|
| 46 |
+
inputs = tokenizer(input_text, return_tensors="pt")
|
| 47 |
+
output = model.generate(**inputs, max_length=300, temperature=0.7)
|
| 48 |
+
return tokenizer.decode(output[0], skip_special_tokens=True)
|
| 49 |
+
|
| 50 |
# Streamlit UI
|
| 51 |
st.title("Chat with AWS Restart PDF (Like ChatPDF)")
|
| 52 |
|
|
|
|
| 63 |
|
| 64 |
if st.button("Get Answer") and question:
|
| 65 |
relevant_chunk = find_best_chunk(question, index, chunks, embeddings)
|
| 66 |
+
response = get_answer(question, relevant_chunk)
|
| 67 |
+
st.write("Answer:", response)
|