Spaces:

rahideer
/

MultilingualRAGApplication

Sleeping

App Files Files Community

rahideer commited on Apr 13, 2025

Commit

c0fc352

verified ·

1 Parent(s): 2c19e76

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -34

app.py CHANGED Viewed

@@ -1,35 +1,67 @@
 import streamlit as st
-from transformers import pipeline
-import groq
-# Initialize Groq API
-groq_client = groq.Client()
-# Initialize the zero-shot classification pipeline from Hugging Face
-classifier = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
-# Function to perform zero-shot classification
-def classify_text(sequence, candidate_labels):
-    result = classifier(sequence, candidate_labels)
-    return result
-# Streamlit UI elements
-st.title("Zero-Shot Text Classification with XLM-RoBERTa")
-st.markdown("Enter a text and select candidate labels for classification.")
-# Text input from the user
-sequence = st.text_area("Enter text to classify", "", height=150)
-# Candidate labels
-candidate_labels = st.text_input("Enter candidate labels (comma separated)", "politics, health, education")
-candidate_labels = [label.strip() for label in candidate_labels.split(",")]
-# When the classify button is pressed
-if st.button("Classify Text"):
-    if sequence:
-        result = classify_text(sequence, candidate_labels)
-        st.write("Classification Results:")
-        st.write(f"Labels: {result['labels']}")
-        st.write(f"Scores: {result['scores']}")
-    else:
-        st.error("Please enter text to classify.")

 import streamlit as st
+from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
+from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+# Load pre-trained multilingual model for retrieval and generation
+tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq")
+model = RagSequenceForGeneration.from_pretrained("facebook/rag-token-nq")
+retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name="faiss")
+# Set up FAISS for multilingual document retrieval
+def setup_faiss():
+    # Load multilingual embeddings for documents (e.g., using LaBSE or multilingual BERT)
+    model_embed = SentenceTransformer('sentence-transformers/LaBSE')
+    # Example multilingual documents
+    docs = [
+        "How to learn programming?",
+        "Comment apprendre la programmation?",
+        "پروگرامنگ سیکھنے کا طریقہ کیا ہے؟"
+    ]
+    embeddings = model_embed.encode(docs, convert_to_tensor=True)
+    faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
+    faiss_index.add(np.array(embeddings))
+    return faiss_index, docs
+# Set up FAISS index
+faiss_index, docs = setup_faiss()
+# Retrieve documents based on query
+def retrieve_docs(query):
+    # Embed the query
+    query_embedding = SentenceTransformer('sentence-transformers/LaBSE').encode([query], convert_to_tensor=True)
+    # Perform retrieval using FAISS
+    D, I = faiss_index.search(np.array(query_embedding), 1)
+    # Get the most relevant document
+    return docs[I[0][0]]
+# Handle question-answering
+def answer_question(query):
+    # Retrieve relevant document
+    retrieved_doc = retrieve_docs(query)
+    # Tokenize the input
+    inputs = tokenizer(query, retrieved_doc, return_tensors="pt", padding=True, truncation=True)
+    # Generate an answer
+    generated = model.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
+    # Decode the answer
+    answer = tokenizer.decode(generated[0], skip_special_tokens=True)
+    return answer
+# Streamlit interface for user input
+st.title("Multilingual RAG Translator/Answer Bot")
+st.write("Ask a question in your preferred language (Urdu, French, Hindi)")
+query = st.text_input("Enter your question:")
+if query:
+    answer = answer_question(query)
+    st.write(f"Answer: {answer}")