fahadkhan93 commited on
Commit
38840d9
·
verified ·
1 Parent(s): b5f4436

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -23
app.py CHANGED
@@ -1,42 +1,35 @@
1
  import gradio as gr
2
  from datasets import load_dataset
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  import numpy as np
7
 
8
  # Load dataset
9
  ds = load_dataset("STEM-AI-mtl/Electrical-engineering", split="train")
10
- docs = [q + " " + a for q, a in zip(ds["input"], ds["output"])]
 
 
11
  vectorizer = TfidfVectorizer().fit(docs)
12
  tfidf_matrix = vectorizer.transform(docs)
13
 
14
- # Load LLM
15
- model_name = "STEM-AI-mtl/phi-2-electrical-engineering"
16
- tokenizer = AutoTokenizer.from_pretrained(model_name)
17
- model = AutoModelForCausalLM.from_pretrained(model_name)
18
- qa_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=512)
19
-
20
  # Retrieval function
21
- def retrieve(user_q, top_k=1):
22
  vec = vectorizer.transform([user_q])
23
  sims = cosine_similarity(vec, tfidf_matrix).flatten()
24
- best = np.argmax(sims)
25
- return ds["input"][best], ds["output"][best]
26
 
27
- # Full QA function
28
- def answer(user_q):
29
- q, a = retrieve(user_q)
30
- prompt = f"Question: {user_q}\nRelevant Q: {q}\nRelevant A: {a}\nAnswer:"
31
- gen = qa_pipeline(prompt)
32
- return gen[0]["generated_text"].split("Answer:")[-1].strip()
33
-
34
- # Gradio UI
35
  with gr.Blocks() as demo:
36
- gr.Markdown("# 🛠️ EE Smart Q&A Chatbot")
37
- user_q = gr.Textbox(label="Ask your electronics question", lines=2)
38
- out = gr.Markdown()
39
- user_q.submit(answer, inputs=user_q, outputs=out)
 
 
 
 
 
40
 
41
  if __name__ == "__main__":
42
  demo.launch()
 
1
  import gradio as gr
2
  from datasets import load_dataset
 
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import numpy as np
6
 
7
  # Load dataset
8
  ds = load_dataset("STEM-AI-mtl/Electrical-engineering", split="train")
9
+
10
+ # Combine input (question) and output (answer) for vectorization
11
+ docs = [inp + " " + out for inp, out in zip(ds["input"], ds["output"])]
12
  vectorizer = TfidfVectorizer().fit(docs)
13
  tfidf_matrix = vectorizer.transform(docs)
14
 
 
 
 
 
 
 
15
  # Retrieval function
16
+ def retrieve_answer(user_q, top_k=1):
17
  vec = vectorizer.transform([user_q])
18
  sims = cosine_similarity(vec, tfidf_matrix).flatten()
19
+ idxs = np.argsort(-sims)[:top_k]
20
+ return "\n\n".join([f"**Q:** {ds['input'][i]}\n**A:** {ds['output'][i]}" for i in idxs])
21
 
22
+ # Gradio app
 
 
 
 
 
 
 
23
  with gr.Blocks() as demo:
24
+ gr.Markdown("# 🤖 Electronics Engineering Q&A Chatbot")
25
+ gr.Markdown("Ask any electronics-related question and get an AI-assisted answer based on a curated dataset.")
26
+
27
+ with gr.Row():
28
+ user_q = gr.Textbox(label="Your Question", lines=2, placeholder="e.g. What is the purpose of a Zener diode?")
29
+ answer_box = gr.Markdown(label="Answer")
30
+ submit_btn = gr.Button("Get Answer")
31
+
32
+ submit_btn.click(fn=retrieve_answer, inputs=user_q, outputs=answer_box)
33
 
34
  if __name__ == "__main__":
35
  demo.launch()