Spaces:
Configuration error
Configuration error
| import gradio as gr | |
| import torch | |
| import torch.nn.functional as F | |
| import numpy as np | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # ===================================================== | |
| # TOP-K HELPER | |
| # ===================================================== | |
| def get_top_k(similarity_scores, texts, k=5): | |
| idx = similarity_scores.argsort()[-k:][::-1] | |
| return [(texts[i], float(similarity_scores[i])) for i in idx] | |
| def format_results(results): | |
| return "\n\n".join( | |
| [f"{i+1}. {txt[:200]}..." for i, (txt, _) in enumerate(results)] | |
| ) | |
| # ===================================================== | |
| # MAIN PIPELINE | |
| # ===================================================== | |
| def pipeline(text): | |
| processed = preprocess(text) | |
| labels = {0: "Negative", 1: "Positive"} | |
| # ========================= | |
| # TF-IDF CLASSIFICATION | |
| # ========================= | |
| tv = vectorizer.transform([processed]) | |
| tfidf_pred = baseline_clf.predict(tv)[0] | |
| # ========================= | |
| # BERT CLASSIFICATION | |
| # ========================= | |
| emb = bert_encoder.encode([text]) | |
| bert_pred = advanced_clf.predict(emb)[0] | |
| # ========================= | |
| # DISTILBERT CLASSIFICATION | |
| # ========================= | |
| inputs = tokenizer( | |
| text, | |
| return_tensors="pt", | |
| truncation=True, | |
| padding=True, | |
| max_length=128 | |
| ) | |
| with torch.no_grad(): | |
| outputs = transformer_model(**inputs) | |
| logits = outputs.logits | |
| pred = torch.argmax(logits, dim=1).item() | |
| probs = F.softmax(logits, dim=1) | |
| confidence = probs.max().item() | |
| distilbert_label = labels[pred] | |
| # ========================= | |
| # TF-IDF TOP-K | |
| # ========================= | |
| q_vec = vectorizer.transform([processed]) | |
| tfidf_sim = cosine_similarity(q_vec, tfidf_matrix).flatten() | |
| tfidf_topk = get_top_k(tfidf_sim, documents) | |
| # ========================= | |
| # BERT TOP-K | |
| # ========================= | |
| q_emb = bert_encoder.encode([text]) | |
| bert_sim = cosine_similarity(q_emb, doc_embeddings).flatten() | |
| bert_topk = get_top_k(bert_sim, documents) | |
| # ========================= | |
| # DISTILBERT TOP-K | |
| # ========================= | |
| inputs_emb = get_distilbert_embedding(text) | |
| distil_sim = cosine_similarity(inputs_emb, distilbert_doc_embeddings).flatten() | |
| distil_topk = get_top_k(distil_sim, documents) | |
| # ========================= | |
| # OUTPUT | |
| # ========================= | |
| classification_output = f""" | |
| TF-IDF Prediction: {labels[tfidf_pred]} | |
| BERT Prediction: {labels[bert_pred]} | |
| DistilBERT Prediction: {distilbert_label} ({confidence*100:.2f}%) | |
| """ | |
| retrieval_output = f""" | |
| 🔹 TF-IDF TOP-5 | |
| {format_results(tfidf_topk)} | |
| ---------------------------- | |
| 🔹 BERT TOP-5 | |
| {format_results(bert_topk)} | |
| ---------------------------- | |
| 🔹 DistilBERT TOP-5 | |
| {format_results(distil_topk)} | |
| """ | |
| return classification_output, retrieval_output | |
| # ===================================================== | |
| # GRADIO UI | |
| # ===================================================== | |
| demo = gr.Interface( | |
| fn=pipeline, | |
| inputs=gr.Textbox( | |
| label="Enter Review / Query", | |
| lines=3, | |
| placeholder="late delivery problem..." | |
| ), | |
| outputs=[ | |
| gr.Textbox(label="🔹 Sentiment Classification"), | |
| gr.Textbox(label="🔹 Top-5 Retrieval Results") | |
| ], | |
| title="NLP Project: Classification + Semantic Search", | |
| description=""" | |
| TF-IDF + BERT + DistilBERT comparison system. | |
| Shows both sentiment classification and semantic retrieval. | |
| """, | |
| examples=[ | |
| ["late delivery problem"], | |
| ["refund not given"], | |
| ["bad customer service"], | |
| ["product arrived damaged"] | |
| ] | |
| ) | |
| demo.launch() |