Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import faiss | |
| from sentence_transformers import SentenceTransformer | |
| from transformers import pipeline | |
| import streamlit as st | |
| # Load models | |
| def load_embedding_model(): | |
| return SentenceTransformer('all-MiniLM-L6-v2') # Using Sentence-BERT | |
| def load_text_generator(): | |
| return pipeline("text2text-generation", model="t5-small") | |
| def load_rephrasing_model(): | |
| return pipeline("text2text-generation", model="t5-base") | |
| embedding_model = load_embedding_model() | |
| text_generator = load_text_generator() | |
| rephrasing_model = load_rephrasing_model() | |
| # Preprocess and embed data | |
| def preprocess_and_embed(data): | |
| text_data = data.astype(str).apply(" ".join, axis=1) # Concatenate rows | |
| embeddings = embedding_model.encode(text_data.tolist(), convert_to_numpy=True) | |
| return text_data, embeddings | |
| # Create FAISS index | |
| def create_faiss_index(embeddings): | |
| dimension = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dimension) | |
| index.add(embeddings) | |
| return index | |
| # Retrieve recommendations | |
| def retrieve_recommendations(query, text_data, index, embeddings, k=5): | |
| query_embedding = embedding_model.encode([query], convert_to_numpy=True) | |
| distances, indices = index.search(query_embedding, k) | |
| results = [{"text": text_data[i], "distance": d} for i, d in zip(indices[0], distances[0])] | |
| return results | |
| # Generate contextual response based on query and context | |
| def generate_response(query, recommendations): | |
| context = " ".join([rec["text"] for rec in recommendations]) | |
| limited_context = f"Context: {context}\nQuery: {query}\nAnswer:" | |
| response = text_generator(limited_context, max_length=100, num_beams=3) | |
| return response[0]['generated_text'] | |
| # Rephrase the generated response | |
| def rephrase_response(response): | |
| rephrased = rephrasing_model(f"Rephrase this: {response}", max_length=100, num_beams=3) | |
| return rephrased[0]['generated_text'] | |
| # Streamlit UI | |
| st.title("RAG-Based AI Recommendation System (Using SBERT)") | |
| uploaded_file = st.file_uploader("Upload CSV/Excel file", type=["csv", "xlsx"]) | |
| if uploaded_file: | |
| file_type = uploaded_file.name.split(".")[-1] | |
| if file_type == "csv": | |
| data = pd.read_csv(uploaded_file) | |
| else: | |
| data = pd.read_excel(uploaded_file) | |
| st.write("Data Preview:") | |
| st.write(data.head()) | |
| # Preprocess and embed | |
| with st.spinner("Processing data and creating embeddings..."): | |
| text_data, embeddings = preprocess_and_embed(data) | |
| index = create_faiss_index(embeddings) | |
| st.success("Data processed and embeddings created!") | |
| # Query input | |
| query = st.text_input("Enter your query:") | |
| if query: | |
| # Retrieve recommendations | |
| with st.spinner("Retrieving recommendations..."): | |
| recommendations = retrieve_recommendations(query, text_data, index, embeddings) | |
| st.write("Top Recommendations:") | |
| for rec in recommendations: | |
| st.write(f"- {rec['text']} (Distance: {rec['distance']:.2f})") | |
| # Generate contextual response | |
| with st.spinner("Generating contextual recommendation..."): | |
| refined_response = generate_response(query, recommendations) | |
| # Rephrase the response | |
| with st.spinner("Rephrasing the recommendation for better grammar and clarity..."): | |
| final_response = rephrase_response(refined_response) | |
| st.write("Contextual Recommendation:") | |
| st.write(final_response) | |