Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,42 +1,35 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from datasets import load_dataset
|
| 3 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 5 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
import numpy as np
|
| 7 |
|
| 8 |
# Load dataset
|
| 9 |
ds = load_dataset("STEM-AI-mtl/Electrical-engineering", split="train")
|
| 10 |
-
|
|
|
|
|
|
|
| 11 |
vectorizer = TfidfVectorizer().fit(docs)
|
| 12 |
tfidf_matrix = vectorizer.transform(docs)
|
| 13 |
|
| 14 |
-
# Load LLM
|
| 15 |
-
model_name = "STEM-AI-mtl/phi-2-electrical-engineering"
|
| 16 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 17 |
-
model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 18 |
-
qa_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=512)
|
| 19 |
-
|
| 20 |
# Retrieval function
|
| 21 |
-
def
|
| 22 |
vec = vectorizer.transform([user_q])
|
| 23 |
sims = cosine_similarity(vec, tfidf_matrix).flatten()
|
| 24 |
-
|
| 25 |
-
return
|
| 26 |
|
| 27 |
-
#
|
| 28 |
-
def answer(user_q):
|
| 29 |
-
q, a = retrieve(user_q)
|
| 30 |
-
prompt = f"Question: {user_q}\nRelevant Q: {q}\nRelevant A: {a}\nAnswer:"
|
| 31 |
-
gen = qa_pipeline(prompt)
|
| 32 |
-
return gen[0]["generated_text"].split("Answer:")[-1].strip()
|
| 33 |
-
|
| 34 |
-
# Gradio UI
|
| 35 |
with gr.Blocks() as demo:
|
| 36 |
-
gr.Markdown("#
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
if __name__ == "__main__":
|
| 42 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from datasets import load_dataset
|
|
|
|
| 3 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 4 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 5 |
import numpy as np
|
| 6 |
|
| 7 |
# Load dataset
|
| 8 |
ds = load_dataset("STEM-AI-mtl/Electrical-engineering", split="train")
|
| 9 |
+
|
| 10 |
+
# Combine input (question) and output (answer) for vectorization
|
| 11 |
+
docs = [inp + " " + out for inp, out in zip(ds["input"], ds["output"])]
|
| 12 |
vectorizer = TfidfVectorizer().fit(docs)
|
| 13 |
tfidf_matrix = vectorizer.transform(docs)
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
# Retrieval function
|
| 16 |
+
def retrieve_answer(user_q, top_k=1):
|
| 17 |
vec = vectorizer.transform([user_q])
|
| 18 |
sims = cosine_similarity(vec, tfidf_matrix).flatten()
|
| 19 |
+
idxs = np.argsort(-sims)[:top_k]
|
| 20 |
+
return "\n\n".join([f"**Q:** {ds['input'][i]}\n**A:** {ds['output'][i]}" for i in idxs])
|
| 21 |
|
| 22 |
+
# Gradio app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
with gr.Blocks() as demo:
|
| 24 |
+
gr.Markdown("# 🤖 Electronics Engineering Q&A Chatbot")
|
| 25 |
+
gr.Markdown("Ask any electronics-related question and get an AI-assisted answer based on a curated dataset.")
|
| 26 |
+
|
| 27 |
+
with gr.Row():
|
| 28 |
+
user_q = gr.Textbox(label="Your Question", lines=2, placeholder="e.g. What is the purpose of a Zener diode?")
|
| 29 |
+
answer_box = gr.Markdown(label="Answer")
|
| 30 |
+
submit_btn = gr.Button("Get Answer")
|
| 31 |
+
|
| 32 |
+
submit_btn.click(fn=retrieve_answer, inputs=user_q, outputs=answer_box)
|
| 33 |
|
| 34 |
if __name__ == "__main__":
|
| 35 |
demo.launch()
|