| # import streamlit as st | |
| # import faiss | |
| # import pickle | |
| # import numpy as np | |
| # import torch | |
| # from transformers import T5Tokenizer, T5ForConditionalGeneration | |
| # from sentence_transformers import SentenceTransformer | |
| # # Load LLM model (local folder) | |
| # @st.cache_resource | |
| # def load_llm(): | |
| # model_path = "./Generator_Model" | |
| # tokenizer = T5Tokenizer.from_pretrained(model_path) | |
| # model = T5ForConditionalGeneration.from_pretrained(model_path) | |
| # return tokenizer, model | |
| # # Load embedding model (local folder) | |
| # @st.cache_resource | |
| # def load_embedding_model(): | |
| # embed_model_path = "./Embedding_Model1" | |
| # return SentenceTransformer(embed_model_path) | |
| # # Load FAISS index and embeddings | |
| # @st.cache_resource | |
| # def load_faiss(): | |
| # faiss_index = faiss.read_index("faiss_index_file.index") | |
| # data = np.load("embeddings_file.npy", allow_pickle=True) | |
| # return faiss_index, data | |
| # # Search function | |
| # def search(query, embed_model, index, data): | |
| # query_embedding = embed_model.encode([query]).astype('float32') | |
| # _, I = index.search(query_embedding, k=5) # Top 5 results | |
| # results = [data['texts'][i] for i in I[0] if i != -1] | |
| # return results | |
| # # Generate response using LLM | |
| # def generate_response(context, query, tokenizer, model): | |
| # input_text = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:" | |
| # inputs = tokenizer.encode(input_text, return_tensors="pt") | |
| # outputs = model.generate(inputs, max_length=512, do_sample=True, temperature=0.7) | |
| # response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # return response | |
| # # Streamlit App | |
| # def main(): | |
| # st.title("Local LLM + FAISS + Embedding Search App") | |
| # st.markdown("π Ask a question, and get context-aware answers!") | |
| # # Load everything once | |
| # tokenizer, llm_model = load_llm() | |
| # embed_model = load_embedding_model() | |
| # faiss_index, data = load_faiss() | |
| # query = st.text_input("Enter your query:") | |
| # if query: | |
| # with st.spinner("Processing..."): | |
| # # Search relevant contexts | |
| # contexts = search(query, embed_model, faiss_index, data) | |
| # combined_context = " ".join(contexts) | |
| # # Generate answer | |
| # response = generate_response(combined_context, query, tokenizer, llm_model) | |
| # st.subheader("Response:") | |
| # st.write(response) | |
| # st.subheader("Top Retrieved Contexts:") | |
| # for idx, ctx in enumerate(contexts, 1): | |
| # st.markdown(f"**{idx}.** {ctx}") | |
| # if __name__ == "__main__": | |
| # main() | |
| ########################### | |
| import os | |
| import streamlit as st | |
| import faiss | |
| import pickle | |
| import numpy as np | |
| import torch | |
| from transformers import AutoTokenizer, AutoModel, T5Tokenizer, T5ForConditionalGeneration,AutoModelForSeq2SeqLM | |
| # Paths (everything is local now) | |
| FAISS_INDEX_PATH = "faiss_index_file.index" | |
| TEXTS_PATH = "texts.pkl" | |
| EMBEDDINGS_PATH = "embeddings_file.npy" | |
| # EMBEDDING_MODEL_NAME = "Ah1111/Embedding_Model" | |
| # GENERATOR_MODEL_NAME = "Ah1111/Generator_Model" | |
| # Load generator model (T5) | |
| def load_llm(): | |
| tokenizer = T5Tokenizer.from_pretrained("Ah1111/Generator_Model") | |
| model = T5ForConditionalGeneration.from_pretrained("Ah1111/Generator_Model") | |
| return tokenizer, model | |
| # model_name = "google/flan-t5-base" | |
| # tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| # model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| # return tokenizer, model | |
| # Load embedding model (custom Hugging Face model) | |
| def load_embedding_model(): | |
| tokenizer = AutoTokenizer.from_pretrained("Ah1111/Embedding_Model") | |
| model = AutoModel.from_pretrained("Ah1111/Embedding_Model") | |
| return tokenizer, model | |
| # Load FAISS index and texts | |
| def load_faiss(): | |
| faiss_index = faiss.read_index(FAISS_INDEX_PATH) | |
| with open(TEXTS_PATH, "rb") as f: | |
| data = pickle.load(f) | |
| embeddings = np.load(EMBEDDINGS_PATH, allow_pickle=True) | |
| return faiss_index, data, embeddings | |
| # Function to encode query using the embedding model | |
| def encode_query(query, tokenizer, model): | |
| inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True) | |
| with torch.no_grad(): | |
| embeddings = model(**inputs).last_hidden_state.mean(dim=1) | |
| return embeddings.cpu().numpy() | |
| # Search top-k contexts | |
| def search(query, tokenizer, model, index, data, k=5): | |
| query_embedding = encode_query(query, tokenizer, model).astype('float32') | |
| _, I = index.search(query_embedding, k) | |
| results = [data[i] for i in I[0] if i != -1] | |
| return results | |
| # Generate response using generator model | |
| def generate_response(context, query, tokenizer, model): | |
| input_text = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:" | |
| inputs = tokenizer.encode(input_text, return_tensors="pt", truncation=True) | |
| outputs = model.generate(inputs, max_length=512, do_sample=True, temperature=0.7) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return response | |
| # Streamlit app | |
| def main(): | |
| st.set_page_config(page_title="Clinical QA with RAG", page_icon="π©Ί") | |
| st.title("π Clinical QA System (RAG + FAISS + T5)") | |
| st.markdown( | |
| """ | |
| Enter your **clinical question** below. | |
| The system will retrieve relevant context and generate an informed answer using a local model. π | |
| """ | |
| ) | |
| # Load models and files | |
| embed_tokenizer, embed_model = load_embedding_model() | |
| gen_tokenizer, gen_model = load_llm() | |
| faiss_index, data, embeddings = load_faiss() | |
| query = st.text_input("π¬ Your Question:") | |
| if query: | |
| with st.spinner("π Retrieving and Generating..."): | |
| contexts = search(query, embed_tokenizer, embed_model, faiss_index, data) | |
| combined_context = " ".join(contexts) | |
| response = generate_response(combined_context, query, gen_tokenizer, gen_model) | |
| st.success("β Answer Ready!") | |
| st.subheader("π Response:") | |
| st.write(response) | |
| if __name__ == "__main__": | |
| main() | |