Engineer786 commited on
Commit
92ab38b
·
verified ·
1 Parent(s): f844e77

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +61 -0
  2. requirements.txt +8 -0
  3. scraper.py +75 -0
  4. utils.py +21 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import streamlit as st
4
+ import pandas as pd
5
+ from scraper import scrape_tariffs
6
+ from transformers import AutoTokenizer, AutoModel
7
+ import torch
8
+
9
+ # Load pre-trained transformer model for embeddings directly
10
+ tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
11
+ model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
12
+
13
+ # Initialize Streamlit components
14
+ st.title("Electricity Bill Estimator")
15
+ st.sidebar.header("User Input")
16
+
17
+ tariff_urls = {
18
+ "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
19
+ "FESCO": "https://fesco.com.pk/tariff",
20
+ "HESCO": "http://www.hesco.gov.pk/htmls/tariffs.htm",
21
+ "KE": "https://www.ke.com.pk/customer-services/tariff-structure/",
22
+ "LESCO": "https://www.lesco.gov.pk/ElectricityTariffs",
23
+ "PESCO": "https://pesconlinebill.pk/pesco-tariff/",
24
+ "QESCO": "http://qesco.com.pk/Tariffs.aspx",
25
+ "TESCO": "https://tesco.gov.pk/index.php/electricity-traiff"
26
+ }
27
+
28
+ def show_tariff_input():
29
+ # Display tariff rates selection
30
+ tariff_data = pd.read_csv("data/tariffs.csv")
31
+ tariff_types = tariff_data["category"].unique()
32
+ tariff_choice = st.selectbox("Select your tariff category:", tariff_types)
33
+ st.write(f"Selected Tariff: {tariff_choice}")
34
+
35
+ def scrape_data():
36
+ # Scraping tariff data using provided URLs
37
+ scrape_tariffs(list(tariff_urls.values()))
38
+
39
+ # Streamlit actions
40
+ if st.sidebar.button("Scrape Data"):
41
+ scrape_data()
42
+
43
+ # User inputs for appliance and usage time (replace placeholders as needed)
44
+ appliance_load = st.number_input("Enter appliance load in watts", min_value=10, max_value=5000, value=1000)
45
+ usage_time = st.number_input("Enter usage time (in hours)", min_value=1, max_value=24, value=5)
46
+
47
+ # Placeholder for electricity bill calculation and output display
48
+ if appliance_load and usage_time:
49
+ bill_amount = appliance_load * usage_time * 0.25 # Add your own calculation based on tariffs
50
+ st.write(f"Your electricity bill: {bill_amount} PKR")
51
+
52
+ # Example of using Hugging Face's transformers directly to encode queries
53
+ user_query = st.text_input("Ask about your tariff or appliance:")
54
+ if user_query:
55
+ # Tokenizing the query and calculating embeddings
56
+ inputs = tokenizer(user_query, return_tensors="pt", padding=True, truncation=True)
57
+ with torch.no_grad():
58
+ outputs = model(**inputs)
59
+ embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
60
+
61
+ st.write(f"Query embedding (for further processing): {embeddings}")
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.24.0
2
+ beautifulsoup4==4.12.2
3
+ requests==2.31.0
4
+ pandas==1.5.3
5
+ torch==2.1.0 # PyTorch required for embeddings
6
+ transformers==4.34.0 # Hugging Face Transformers for loading the model
7
+ huggingface_hub>=0.16.4 # Handling Hugging Face API (already included for model loading)
8
+ faiss-cpu==1.7.4 # Optional, for similarity queries on embeddings if needed
scraper.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+ import time
6
+ from random import randint
7
+
8
+ def scrape_tariffs(urls):
9
+ data = []
10
+
11
+ # Ensure the 'data' directory exists before saving the CSV
12
+ os.makedirs("data", exist_ok=True)
13
+
14
+ for url in urls:
15
+ try:
16
+ response = requests.get(url, timeout=10) # Added timeout
17
+ response.raise_for_status() # Raise exception for bad status codes (4xx, 5xx)
18
+
19
+ # Scrape data if the response is OK
20
+ if response.status_code == 200:
21
+ soup = BeautifulSoup(response.content, "html.parser")
22
+ rows = soup.find_all("tr")
23
+
24
+ for row in rows:
25
+ cells = row.find_all("td")
26
+ if len(cells) >= 2:
27
+ try:
28
+ data.append({
29
+ "category": cells[0].text.strip(),
30
+ "rate": float(cells[1].text.strip().replace(",", "")),
31
+ })
32
+ except ValueError:
33
+ continue
34
+
35
+ except requests.exceptions.RequestException as e:
36
+ print(f"Error fetching data from {url}: {e}")
37
+ print("Retrying...")
38
+
39
+ # Retry logic in case of failure (max 3 retries with random delay)
40
+ retries = 3
41
+ while retries > 0:
42
+ time.sleep(randint(1, 3)) # Sleep for a random time before retrying
43
+ retries -= 1
44
+ try:
45
+ response = requests.get(url, timeout=10)
46
+ response.raise_for_status()
47
+ if response.status_code == 200:
48
+ soup = BeautifulSoup(response.content, "html.parser")
49
+ rows = soup.find_all("tr")
50
+
51
+ for row in rows:
52
+ cells = row.find_all("td")
53
+ if len(cells) >= 2:
54
+ try:
55
+ data.append({
56
+ "category": cells[0].text.strip(),
57
+ "rate": float(cells[1].text.strip().replace(",", "")),
58
+ })
59
+ except ValueError:
60
+ continue
61
+ break
62
+ except requests.exceptions.RequestException:
63
+ print(f"Retry failed: {e}")
64
+ continue
65
+
66
+ # Sleep between requests to avoid hitting the servers too quickly
67
+ time.sleep(randint(2, 5))
68
+
69
+ if data:
70
+ df = pd.DataFrame(data)
71
+ # Save the scraped data to the 'data' directory
72
+ df.to_csv("data/tariffs.csv", index=False)
73
+ print("Tariff data saved successfully.")
74
+ else:
75
+ print("No tariff data found.")
utils.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import faiss
2
+ import numpy as np
3
+
4
+ def generate_faiss_index(embeddings):
5
+ # Ensure that the embeddings are converted to np.float32 (FAISS expects float32)
6
+ embeddings = np.array(embeddings, dtype=np.float32)
7
+ index = faiss.IndexFlatL2(768) # Assuming 768-dimensional embeddings for a model like MiniLM
8
+ index.add(embeddings)
9
+ return index
10
+
11
+ def load_faiss_index_to_gpu(index):
12
+ # If you're using GPU, ensure the index is moved to the GPU
13
+ res = faiss.StandardGpuResources() # Create resources for the GPU
14
+ gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Load into GPU (assuming GPU 0 is available)
15
+ return gpu_index
16
+
17
+ def query_faiss_index(query_embedding, gpu_index):
18
+ # Query the FAISS index with the query embedding
19
+ query_embedding = np.array(query_embedding, dtype=np.float32) # Ensure the query is a np.array with the right type
20
+ distances, indices = gpu_index.search(query_embedding.reshape(1, -1), 1) # Reshaping as FAISS expects 2D array
21
+ return indices, distances