Spaces:

Engineer786
/

Hackathon-ElecBillAIDriven

Sleeping

App Files Files Community

Engineer786 commited on Jan 5, 2025

Commit

92ab38b

verified ·

1 Parent(s): f844e77

Upload 4 files

Browse files

Files changed (4) hide show

app.py +61 -0
requirements.txt +8 -0
scraper.py +75 -0
utils.py +21 -0

app.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import os
+import requests
+import streamlit as st
+import pandas as pd
+from scraper import scrape_tariffs
+from transformers import AutoTokenizer, AutoModel
+import torch
+# Load pre-trained transformer model for embeddings directly
+tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
+model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
+# Initialize Streamlit components
+st.title("Electricity Bill Estimator")
+st.sidebar.header("User Input")
+tariff_urls = {
+    "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
+    "FESCO": "https://fesco.com.pk/tariff",
+    "HESCO": "http://www.hesco.gov.pk/htmls/tariffs.htm",
+    "KE": "https://www.ke.com.pk/customer-services/tariff-structure/",
+    "LESCO": "https://www.lesco.gov.pk/ElectricityTariffs",
+    "PESCO": "https://pesconlinebill.pk/pesco-tariff/",
+    "QESCO": "http://qesco.com.pk/Tariffs.aspx",
+    "TESCO": "https://tesco.gov.pk/index.php/electricity-traiff"
+}
+def show_tariff_input():
+    # Display tariff rates selection
+    tariff_data = pd.read_csv("data/tariffs.csv")
+    tariff_types = tariff_data["category"].unique()
+    tariff_choice = st.selectbox("Select your tariff category:", tariff_types)
+    st.write(f"Selected Tariff: {tariff_choice}")
+def scrape_data():
+    # Scraping tariff data using provided URLs
+    scrape_tariffs(list(tariff_urls.values()))
+# Streamlit actions
+if st.sidebar.button("Scrape Data"):
+    scrape_data()
+# User inputs for appliance and usage time (replace placeholders as needed)
+appliance_load = st.number_input("Enter appliance load in watts", min_value=10, max_value=5000, value=1000)
+usage_time = st.number_input("Enter usage time (in hours)", min_value=1, max_value=24, value=5)
+# Placeholder for electricity bill calculation and output display
+if appliance_load and usage_time:
+    bill_amount = appliance_load * usage_time * 0.25  # Add your own calculation based on tariffs
+    st.write(f"Your electricity bill: {bill_amount} PKR")
+# Example of using Hugging Face's transformers directly to encode queries
+user_query = st.text_input("Ask about your tariff or appliance:")
+if user_query:
+    # Tokenizing the query and calculating embeddings
+    inputs = tokenizer(user_query, return_tensors="pt", padding=True, truncation=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
+    st.write(f"Query embedding (for further processing): {embeddings}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit==1.24.0
+beautifulsoup4==4.12.2
+requests==2.31.0
+pandas==1.5.3
+torch==2.1.0  # PyTorch required for embeddings
+transformers==4.34.0  # Hugging Face Transformers for loading the model
+huggingface_hub>=0.16.4  # Handling Hugging Face API (already included for model loading)
+faiss-cpu==1.7.4  # Optional, for similarity queries on embeddings if needed

scraper.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import os
+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+import time
+from random import randint
+def scrape_tariffs(urls):
+    data = []
+    # Ensure the 'data' directory exists before saving the CSV
+    os.makedirs("data", exist_ok=True)
+    for url in urls:
+        try:
+            response = requests.get(url, timeout=10)  # Added timeout
+            response.raise_for_status()  # Raise exception for bad status codes (4xx, 5xx)
+            # Scrape data if the response is OK
+            if response.status_code == 200:
+                soup = BeautifulSoup(response.content, "html.parser")
+                rows = soup.find_all("tr")
+                for row in rows:
+                    cells = row.find_all("td")
+                    if len(cells) >= 2:
+                        try:
+                            data.append({
+                                "category": cells[0].text.strip(),
+                                "rate": float(cells[1].text.strip().replace(",", "")),
+                            })
+                        except ValueError:
+                            continue
+        except requests.exceptions.RequestException as e:
+            print(f"Error fetching data from {url}: {e}")
+            print("Retrying...")
+            # Retry logic in case of failure (max 3 retries with random delay)
+            retries = 3
+            while retries > 0:
+                time.sleep(randint(1, 3))  # Sleep for a random time before retrying
+                retries -= 1
+                try:
+                    response = requests.get(url, timeout=10)
+                    response.raise_for_status()
+                    if response.status_code == 200:
+                        soup = BeautifulSoup(response.content, "html.parser")
+                        rows = soup.find_all("tr")
+                        for row in rows:
+                            cells = row.find_all("td")
+                            if len(cells) >= 2:
+                                try:
+                                    data.append({
+                                        "category": cells[0].text.strip(),
+                                        "rate": float(cells[1].text.strip().replace(",", "")),
+                                    })
+                                except ValueError:
+                                    continue
+                        break
+                except requests.exceptions.RequestException:
+                    print(f"Retry failed: {e}")
+                    continue
+        # Sleep between requests to avoid hitting the servers too quickly
+        time.sleep(randint(2, 5))
+    if data:
+        df = pd.DataFrame(data)
+        # Save the scraped data to the 'data' directory
+        df.to_csv("data/tariffs.csv", index=False)
+        print("Tariff data saved successfully.")
+    else:
+        print("No tariff data found.")

utils.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import faiss
+import numpy as np
+def generate_faiss_index(embeddings):
+    # Ensure that the embeddings are converted to np.float32 (FAISS expects float32)
+    embeddings = np.array(embeddings, dtype=np.float32)
+    index = faiss.IndexFlatL2(768)  # Assuming 768-dimensional embeddings for a model like MiniLM
+    index.add(embeddings)
+    return index
+def load_faiss_index_to_gpu(index):
+    # If you're using GPU, ensure the index is moved to the GPU
+    res = faiss.StandardGpuResources()  # Create resources for the GPU
+    gpu_index = faiss.index_cpu_to_gpu(res, 0, index)  # Load into GPU (assuming GPU 0 is available)
+    return gpu_index
+def query_faiss_index(query_embedding, gpu_index):
+    # Query the FAISS index with the query embedding
+    query_embedding = np.array(query_embedding, dtype=np.float32)  # Ensure the query is a np.array with the right type
+    distances, indices = gpu_index.search(query_embedding.reshape(1, -1), 1)  # Reshaping as FAISS expects 2D array
+    return indices, distances