Spaces:

rahideer
/

MultilingualRAGApplication

Sleeping

App Files Files Community

rahideer commited on Apr 13, 2025

Commit

2c19e76

verified ·

1 Parent(s): 97c48f3

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -60

app.py CHANGED Viewed

@@ -1,61 +1,35 @@
 import streamlit as st
-from datasets import load_dataset
-from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
-# Load a multilingual dataset (xnli or tydi_qa)
-def load_data():
-    try:
-        # Use a specific version of the dataset
-        dataset = load_dataset("xnli", "all_languages", split="validation")  # Using a direct name instead of a wildcard pattern
-        st.write(f"Loaded {len(dataset)} examples from the 'validation' split.")
-        return dataset
-    except Exception as e:
-        st.write(f"Error loading 'xnli' dataset: {e}")
-        return None
-# Initialize RAG model components
-def initialize_rag():
-    try:
-        # Initialize tokenizer and retriever
-        tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq")
-        retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name="compressed", passages_path="./path_to_data")
-        model = RagSequenceForGeneration.from_pretrained("facebook/rag-token-nq")
-        return tokenizer, retriever, model
-    except Exception as e:
-        st.write(f"Error initializing RAG components: {e}")
-        return None, None, None
-# Main function to run the app
-def main():
-    st.title("Multilingual RAG Translator/Answer Bot")
-    # Load the dataset
-    dataset = load_data()
-    if dataset is None:
-        st.write("Dataset could not be loaded.")
-        return
-    # Initialize RAG model components
-    tokenizer, retriever, model = initialize_rag()
-    if tokenizer is None or retriever is None or model is None:
-        st.write("RAG components could not be initialized.")
-        return
-    # UI to input a query
-    query = st.text_input("Enter your question in Urdu, Hindi, or French:")
-    if query:
-        # Tokenize the input query
-        inputs = tokenizer(query, return_tensors="pt")
-        # Retrieve relevant documents
-        retrieved_docs = retriever.retrieve(query)
-        # Generate an answer using the model
-        generated = model.generate(input_ids=inputs['input_ids'], context_input_ids=retrieved_docs['input_ids'])
-        answer = tokenizer.decode(generated[0], skip_special_tokens=True)
-        st.write("Answer:", answer)
-# Run the Streamlit app
-if __name__ == "__main__":
-    main()

 import streamlit as st
+from transformers import pipeline
+import groq
+# Initialize Groq API
+groq_client = groq.Client()
+# Initialize the zero-shot classification pipeline from Hugging Face
+classifier = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
+# Function to perform zero-shot classification
+def classify_text(sequence, candidate_labels):
+    result = classifier(sequence, candidate_labels)
+    return result
+# Streamlit UI elements
+st.title("Zero-Shot Text Classification with XLM-RoBERTa")
+st.markdown("Enter a text and select candidate labels for classification.")
+# Text input from the user
+sequence = st.text_area("Enter text to classify", "", height=150)
+# Candidate labels
+candidate_labels = st.text_input("Enter candidate labels (comma separated)", "politics, health, education")
+candidate_labels = [label.strip() for label in candidate_labels.split(",")]
+# When the classify button is pressed
+if st.button("Classify Text"):
+    if sequence:
+        result = classify_text(sequence, candidate_labels)
+        st.write("Classification Results:")
+        st.write(f"Labels: {result['labels']}")
+        st.write(f"Scores: {result['scores']}")
+    else:
+        st.error("Please enter text to classify.")