Upload 4 files
Browse files- app.py +61 -0
- requirements.txt +8 -0
- scraper.py +75 -0
- utils.py +21 -0
app.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
import streamlit as st
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from scraper import scrape_tariffs
|
| 6 |
+
from transformers import AutoTokenizer, AutoModel
|
| 7 |
+
import torch
|
| 8 |
+
|
| 9 |
+
# Load pre-trained transformer model for embeddings directly
|
| 10 |
+
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
|
| 11 |
+
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
|
| 12 |
+
|
| 13 |
+
# Initialize Streamlit components
|
| 14 |
+
st.title("Electricity Bill Estimator")
|
| 15 |
+
st.sidebar.header("User Input")
|
| 16 |
+
|
| 17 |
+
tariff_urls = {
|
| 18 |
+
"IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
|
| 19 |
+
"FESCO": "https://fesco.com.pk/tariff",
|
| 20 |
+
"HESCO": "http://www.hesco.gov.pk/htmls/tariffs.htm",
|
| 21 |
+
"KE": "https://www.ke.com.pk/customer-services/tariff-structure/",
|
| 22 |
+
"LESCO": "https://www.lesco.gov.pk/ElectricityTariffs",
|
| 23 |
+
"PESCO": "https://pesconlinebill.pk/pesco-tariff/",
|
| 24 |
+
"QESCO": "http://qesco.com.pk/Tariffs.aspx",
|
| 25 |
+
"TESCO": "https://tesco.gov.pk/index.php/electricity-traiff"
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
def show_tariff_input():
|
| 29 |
+
# Display tariff rates selection
|
| 30 |
+
tariff_data = pd.read_csv("data/tariffs.csv")
|
| 31 |
+
tariff_types = tariff_data["category"].unique()
|
| 32 |
+
tariff_choice = st.selectbox("Select your tariff category:", tariff_types)
|
| 33 |
+
st.write(f"Selected Tariff: {tariff_choice}")
|
| 34 |
+
|
| 35 |
+
def scrape_data():
|
| 36 |
+
# Scraping tariff data using provided URLs
|
| 37 |
+
scrape_tariffs(list(tariff_urls.values()))
|
| 38 |
+
|
| 39 |
+
# Streamlit actions
|
| 40 |
+
if st.sidebar.button("Scrape Data"):
|
| 41 |
+
scrape_data()
|
| 42 |
+
|
| 43 |
+
# User inputs for appliance and usage time (replace placeholders as needed)
|
| 44 |
+
appliance_load = st.number_input("Enter appliance load in watts", min_value=10, max_value=5000, value=1000)
|
| 45 |
+
usage_time = st.number_input("Enter usage time (in hours)", min_value=1, max_value=24, value=5)
|
| 46 |
+
|
| 47 |
+
# Placeholder for electricity bill calculation and output display
|
| 48 |
+
if appliance_load and usage_time:
|
| 49 |
+
bill_amount = appliance_load * usage_time * 0.25 # Add your own calculation based on tariffs
|
| 50 |
+
st.write(f"Your electricity bill: {bill_amount} PKR")
|
| 51 |
+
|
| 52 |
+
# Example of using Hugging Face's transformers directly to encode queries
|
| 53 |
+
user_query = st.text_input("Ask about your tariff or appliance:")
|
| 54 |
+
if user_query:
|
| 55 |
+
# Tokenizing the query and calculating embeddings
|
| 56 |
+
inputs = tokenizer(user_query, return_tensors="pt", padding=True, truncation=True)
|
| 57 |
+
with torch.no_grad():
|
| 58 |
+
outputs = model(**inputs)
|
| 59 |
+
embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
|
| 60 |
+
|
| 61 |
+
st.write(f"Query embedding (for further processing): {embeddings}")
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit==1.24.0
|
| 2 |
+
beautifulsoup4==4.12.2
|
| 3 |
+
requests==2.31.0
|
| 4 |
+
pandas==1.5.3
|
| 5 |
+
torch==2.1.0 # PyTorch required for embeddings
|
| 6 |
+
transformers==4.34.0 # Hugging Face Transformers for loading the model
|
| 7 |
+
huggingface_hub>=0.16.4 # Handling Hugging Face API (already included for model loading)
|
| 8 |
+
faiss-cpu==1.7.4 # Optional, for similarity queries on embeddings if needed
|
scraper.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import time
|
| 6 |
+
from random import randint
|
| 7 |
+
|
| 8 |
+
def scrape_tariffs(urls):
|
| 9 |
+
data = []
|
| 10 |
+
|
| 11 |
+
# Ensure the 'data' directory exists before saving the CSV
|
| 12 |
+
os.makedirs("data", exist_ok=True)
|
| 13 |
+
|
| 14 |
+
for url in urls:
|
| 15 |
+
try:
|
| 16 |
+
response = requests.get(url, timeout=10) # Added timeout
|
| 17 |
+
response.raise_for_status() # Raise exception for bad status codes (4xx, 5xx)
|
| 18 |
+
|
| 19 |
+
# Scrape data if the response is OK
|
| 20 |
+
if response.status_code == 200:
|
| 21 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
| 22 |
+
rows = soup.find_all("tr")
|
| 23 |
+
|
| 24 |
+
for row in rows:
|
| 25 |
+
cells = row.find_all("td")
|
| 26 |
+
if len(cells) >= 2:
|
| 27 |
+
try:
|
| 28 |
+
data.append({
|
| 29 |
+
"category": cells[0].text.strip(),
|
| 30 |
+
"rate": float(cells[1].text.strip().replace(",", "")),
|
| 31 |
+
})
|
| 32 |
+
except ValueError:
|
| 33 |
+
continue
|
| 34 |
+
|
| 35 |
+
except requests.exceptions.RequestException as e:
|
| 36 |
+
print(f"Error fetching data from {url}: {e}")
|
| 37 |
+
print("Retrying...")
|
| 38 |
+
|
| 39 |
+
# Retry logic in case of failure (max 3 retries with random delay)
|
| 40 |
+
retries = 3
|
| 41 |
+
while retries > 0:
|
| 42 |
+
time.sleep(randint(1, 3)) # Sleep for a random time before retrying
|
| 43 |
+
retries -= 1
|
| 44 |
+
try:
|
| 45 |
+
response = requests.get(url, timeout=10)
|
| 46 |
+
response.raise_for_status()
|
| 47 |
+
if response.status_code == 200:
|
| 48 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
| 49 |
+
rows = soup.find_all("tr")
|
| 50 |
+
|
| 51 |
+
for row in rows:
|
| 52 |
+
cells = row.find_all("td")
|
| 53 |
+
if len(cells) >= 2:
|
| 54 |
+
try:
|
| 55 |
+
data.append({
|
| 56 |
+
"category": cells[0].text.strip(),
|
| 57 |
+
"rate": float(cells[1].text.strip().replace(",", "")),
|
| 58 |
+
})
|
| 59 |
+
except ValueError:
|
| 60 |
+
continue
|
| 61 |
+
break
|
| 62 |
+
except requests.exceptions.RequestException:
|
| 63 |
+
print(f"Retry failed: {e}")
|
| 64 |
+
continue
|
| 65 |
+
|
| 66 |
+
# Sleep between requests to avoid hitting the servers too quickly
|
| 67 |
+
time.sleep(randint(2, 5))
|
| 68 |
+
|
| 69 |
+
if data:
|
| 70 |
+
df = pd.DataFrame(data)
|
| 71 |
+
# Save the scraped data to the 'data' directory
|
| 72 |
+
df.to_csv("data/tariffs.csv", index=False)
|
| 73 |
+
print("Tariff data saved successfully.")
|
| 74 |
+
else:
|
| 75 |
+
print("No tariff data found.")
|
utils.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import faiss
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
def generate_faiss_index(embeddings):
|
| 5 |
+
# Ensure that the embeddings are converted to np.float32 (FAISS expects float32)
|
| 6 |
+
embeddings = np.array(embeddings, dtype=np.float32)
|
| 7 |
+
index = faiss.IndexFlatL2(768) # Assuming 768-dimensional embeddings for a model like MiniLM
|
| 8 |
+
index.add(embeddings)
|
| 9 |
+
return index
|
| 10 |
+
|
| 11 |
+
def load_faiss_index_to_gpu(index):
|
| 12 |
+
# If you're using GPU, ensure the index is moved to the GPU
|
| 13 |
+
res = faiss.StandardGpuResources() # Create resources for the GPU
|
| 14 |
+
gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Load into GPU (assuming GPU 0 is available)
|
| 15 |
+
return gpu_index
|
| 16 |
+
|
| 17 |
+
def query_faiss_index(query_embedding, gpu_index):
|
| 18 |
+
# Query the FAISS index with the query embedding
|
| 19 |
+
query_embedding = np.array(query_embedding, dtype=np.float32) # Ensure the query is a np.array with the right type
|
| 20 |
+
distances, indices = gpu_index.search(query_embedding.reshape(1, -1), 1) # Reshaping as FAISS expects 2D array
|
| 21 |
+
return indices, distances
|