Spaces:

gholap310
/

electricity-usage-analytics

Sleeping

App Files Files Community

gholap310 commited on Aug 5, 2025

Commit

5198010

verified ·

1 Parent(s): c13205b

Upload app.py

Browse files

Files changed (1) hide show

app.py +46 -78

app.py CHANGED Viewed

@@ -1,79 +1,47 @@
-{\rtf1\ansi\ansicpg1252\cocoartf2822
-\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fmodern\fcharset0 Courier;}
-{\colortbl;\red255\green255\blue255;\red131\green0\blue165;\red245\green245\blue245;\red0\green0\blue0;
-\red15\green112\blue1;\red86\green65\blue25;\red144\green1\blue18;\red0\green0\blue255;\red0\green0\blue109;
-\red19\green85\blue52;}
-{\*\expandedcolortbl;;\cssrgb\c59216\c13725\c70588;\cssrgb\c96863\c96863\c96863;\cssrgb\c0\c0\c0;
-\cssrgb\c0\c50196\c0;\cssrgb\c41569\c32157\c12941;\cssrgb\c63922\c8235\c8235;\cssrgb\c0\c0\c100000;\cssrgb\c0\c6275\c50196;
-\cssrgb\c6667\c40000\c26667;}
-\margl1440\margr1440\vieww11520\viewh8400\viewkind0
-\deftab720
-\pard\pardeftab720\partightenfactor0
-\f0\fs28 \cf2 \cb3 \expnd0\expndtw0\kerning0
-\outl0\strokewidth0 \strokec2 import\cf0 \strokec4  json\cb1 \
-\cf2 \cb3 \strokec2 from\cf0 \strokec4  sklearn.feature_extraction.text \cf2 \strokec2 import\cf0 \strokec4  TfidfVectorizer\cb1 \
-\cf2 \cb3 \strokec2 from\cf0 \strokec4  sklearn.metrics.pairwise \cf2 \strokec2 import\cf0 \strokec4  cosine_similarity\cb1 \
-\cf2 \cb3 \strokec2 from\cf0 \strokec4  transformers \cf2 \strokec2 import\cf0 \strokec4  pipeline\cb1 \
-\cf2 \cb3 \strokec2 import\cf0 \strokec4  gradio \cf2 \strokec2 as\cf0 \strokec4  gr\cb1 \
-\
-\pard\pardeftab720\partightenfactor0
-\cf5 \cb3 \strokec5 # Load your natural-language corpus\cf0 \cb1 \strokec4 \
-\pard\pardeftab720\partightenfactor0
-\cf2 \cb3 \strokec2 with\cf0 \strokec4  \cf6 \strokec6 open\cf0 \strokec4 (\cf7 \strokec7 "electricity_corpus.json"\cf0 \strokec4 , \cf7 \strokec7 "r"\cf0 \strokec4 ) \cf2 \strokec2 as\cf0 \strokec4  f:\cb1 \
-\pard\pardeftab720\partightenfactor0
-\cf0 \cb3     corpus = json.load(f)\cb1 \
-\
-\pard\pardeftab720\partightenfactor0
-\cf5 \cb3 \strokec5 # Build TF-IDF index\cf0 \cb1 \strokec4 \
-\pard\pardeftab720\partightenfactor0
-\cf0 \cb3 vectorizer = TfidfVectorizer()\cb1 \
-\cb3 tfidf_matrix = vectorizer.fit_transform(corpus)\cb1 \
-\
-\pard\pardeftab720\partightenfactor0
-\cf5 \cb3 \strokec5 # Load the QA model\cf0 \cb1 \strokec4 \
-\pard\pardeftab720\partightenfactor0
-\cf0 \cb3 qa_pipeline = pipeline(\cf7 \strokec7 "question-answering"\cf0 \strokec4 , model=\cf7 \strokec7 "distilbert-base-cased-distilled-squad"\cf0 \strokec4 )\cb1 \
-\
-\pard\pardeftab720\partightenfactor0
-\cf5 \cb3 \strokec5 # Function to retrieve top matching rows\cf0 \cb1 \strokec4 \
-\pard\pardeftab720\partightenfactor0
-\cf8 \cb3 \strokec8 def\cf0 \strokec4  \cf6 \strokec6 get_top_contexts\cf0 \strokec4 (\cf9 \strokec9 question\cf0 \strokec4 , \cf9 \strokec9 top_k\cf0 \strokec4 =\cf10 \strokec10 3\cf0 \strokec4 ):\cb1 \
-\pard\pardeftab720\partightenfactor0
-\cf0 \cb3     question_vec = vectorizer.transform([question])\cb1 \
-\cb3     similarities = cosine_similarity(question_vec, tfidf_matrix).flatten()\cb1 \
-\cb3     top_indices = similarities.argsort()[-top_k:][::\cf10 \strokec10 -1\cf0 \strokec4 ]\cb1 \
-\cb3     \cf2 \strokec2 return\cf0 \strokec4  [corpus[i] \cf2 \strokec2 for\cf0 \strokec4  i \cf8 \strokec8 in\cf0 \strokec4  top_indices]\cb1 \
-\
-\pard\pardeftab720\partightenfactor0
-\cf5 \cb3 \strokec5 # Main logic to get answer\cf0 \cb1 \strokec4 \
-\pard\pardeftab720\partightenfactor0
-\cf8 \cb3 \strokec8 def\cf0 \strokec4  \cf6 \strokec6 answer_question\cf0 \strokec4 (\cf9 \strokec9 question\cf0 \strokec4 , \cf9 \strokec9 top_k\cf0 \strokec4 =\cf10 \strokec10 3\cf0 \strokec4 ):\cb1 \
-\pard\pardeftab720\partightenfactor0
-\cf0 \cb3     \cf2 \strokec2 if\cf0 \strokec4  \cf8 \strokec8 not\cf0 \strokec4  question.strip():\cb1 \
-\cb3         \cf2 \strokec2 return\cf0 \strokec4  \cf7 \strokec7 "Please enter a valid question."\cf0 \cb1 \strokec4 \
-\
-\cb3     contexts = get_top_contexts(question, top_k)\cb1 \
-\cb3     combined_context = \cf7 \strokec7 " "\cf0 \strokec4 .join(contexts)[:\cf10 \strokec10 4096\cf0 \strokec4 ]  \cf5 \strokec5 # truncate to model max input\cf0 \cb1 \strokec4 \
-\cb3     result = qa_pipeline(question=question, context=combined_context)\cb1 \
-\cb3     \cf2 \strokec2 return\cf0 \strokec4  result[\cf7 \strokec7 "answer"\cf0 \strokec4 ]\cb1 \
-\
-\pard\pardeftab720\partightenfactor0
-\cf5 \cb3 \strokec5 # Gradio interface\cf0 \cb1 \strokec4 \
-\pard\pardeftab720\partightenfactor0
-\cf0 \cb3 iface = gr.Interface(\cb1 \
-\cb3     fn=answer_question,\cb1 \
-\cb3     inputs=gr.Textbox(label=\cf7 \strokec7 "Ask your question about electricity usage..."\cf0 \strokec4 ),\cb1 \
-\cb3     outputs=gr.Textbox(label=\cf7 \strokec7 "Answer"\cf0 \strokec4 ),\cb1 \
-\cb3     title=\cf7 \strokec7 "\uc0\u55357 \u56588  Electricity Data Q&A"\cf0 \strokec4 ,\cb1 \
-\cb3     description=\cf7 \strokec7 "Ask questions like 'What was the price for residential in Texas in Jan 2001?' or 'Which state had highest revenue in Jan 2001?'"\cf0 \strokec4 ,\cb1 \
-\cb3 )\cb1 \
-\
-\pard\pardeftab720\partightenfactor0
-\cf5 \cb3 \strokec5 # Run the app\cf0 \cb1 \strokec4 \
-\pard\pardeftab720\partightenfactor0
-\cf2 \cb3 \strokec2 if\cf0 \strokec4  \cf9 \strokec9 __name__\cf0 \strokec4  == \cf7 \strokec7 "__main__"\cf0 \strokec4 :\cb1 \
-\pard\pardeftab720\partightenfactor0
-\cf0 \cb3     iface.launch()\cb1 \
-\
-}

+import json
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+from transformers import pipeline
+import gradio as gr
+# Load your natural-language corpus
+with open("electricity_corpus.json", "r") as f:
+    corpus = json.load(f)
+# Build TF-IDF index
+vectorizer = TfidfVectorizer()
+tfidf_matrix = vectorizer.fit_transform(corpus)
+# Load the QA model
+qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
+# Function to retrieve top matching rows
+def get_top_contexts(question, top_k=3):
+    question_vec = vectorizer.transform([question])
+    similarities = cosine_similarity(question_vec, tfidf_matrix).flatten()
+    top_indices = similarities.argsort()[-top_k:][::-1]
+    return [corpus[i] for i in top_indices]
+# Main logic to get answer
+def answer_question(question, top_k=3):
+    if not question.strip():
+        return "Please enter a valid question."
+    contexts = get_top_contexts(question, top_k)
+    combined_context = " ".join(contexts)[:4096]  # truncate to model max input
+    result = qa_pipeline(question=question, context=combined_context)
+    return result["answer"]
+# Gradio interface
+iface = gr.Interface(
+    fn=answer_question,
+    inputs=gr.Textbox(label="Ask your question about electricity usage..."),
+    outputs=gr.Textbox(label="Answer"),
+    title="🔌 Electricity Data Q&A",
+    description="Ask questions like 'What was the price for residential in Texas in Jan 2001?' or 'Which state had highest revenue in Jan 2001?'",
+)
+# Run the app
+if __name__ == "__main__":
+    iface.launch()