Spaces:

Engineer786
/

PakistanBill

Sleeping

App Files Files Community

PakistanBill / app.py

Engineer786

Update app.py

b5b64fb verified about 1 year ago

raw

history blame contribute delete

4.05 kB

	import os
	import streamlit as st
	import requests
	from bs4 import BeautifulSoup
	from sentence_transformers import SentenceTransformer
	import faiss
	from groq import Groq

	# Fetch API key from environment variable
	API_KEY = os.environ.get('GroqApi')

	global CHUNKS, INDEX, MODEL
	# Initialize global variables
	CHUNKS = None
	INDEX = None
	MODEL = None

	# Function to scrape tariff data
	def scrape_tariff_data(url):
	response = requests.get(url)
	soup = BeautifulSoup(response.text, 'html.parser')
	tariff_data = []
	for paragraph in soup.find_all('p'):
	tariff_data.append(paragraph.text.strip())
	return "\n".join(tariff_data)

	# Function to chunk text into manageable sizes
	def chunk_text(text, max_length=512):
	words = text.split()
	chunks = []
	for i in range(0, len(words), max_length):
	chunks.append(" ".join(words[i:i+max_length]))
	return chunks

	# Function to create embeddings and FAISS index
	def create_faiss_index(chunks, model_name='all-MiniLM-L6-v2'):
	model = SentenceTransformer(model_name)
	embeddings = model.encode(chunks)
	dimension = embeddings.shape[1]
	index = faiss.IndexFlatL2(dimension)
	index.add(embeddings)
	return index, embeddings, model

	# Function to search FAISS for relevant chunks
	def search_faiss(query, index, chunks, model, top_k=5):
	query_embedding = model.encode([query])
	distances, indices = index.search(query_embedding, top_k)
	relevant_chunks = [chunks[i] for i in indices[0] if i < len(chunks)]
	return relevant_chunks

	# Function to query the Groq API with augmented query
	def query_llm(prompt, context):
	if not API_KEY:
	return "Error: GROQ_API_KEY is not set in environment variables."

	client = Groq(api_key=API_KEY)
	augmented_prompt = f"Based on the following data:\n\n{context}\n\nAnswer the question: {prompt}"
	chat_completion = client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": augmented_prompt,
	}
	],
	model="llama3-8b-8192",
	)
	return chat_completion.choices[0].message.content

	# Streamlit UI
	st.title("RAG-Based Tariff Data Application")

	url = st.text_input("Enter Tariff Data URL", "https://iesco.com.pk/index.php/customer-services/tariff-guide")

	if st.button("Process Tariff Data"):
	with st.spinner("Extracting and processing data..."):
	try:
	#global CHUNKS, INDEX, MODEL # Declare globals before modifying
	text = scrape_tariff_data(url)
	if not text:
	st.error("Failed to scrape data from the provided URL.")
	st.stop()

	CHUNKS = chunk_text(text)
	if not CHUNKS:
	st.error("No data available for processing.")
	st.stop()

	INDEX, embeddings, MODEL = create_faiss_index(CHUNKS)
	if not INDEX:
	st.error("Failed to create FAISS index.")
	st.stop()

	st.success("Data processed and indexed!")
	st.write("Number of chunks processed:", len(CHUNKS))

	except Exception as e:
	st.error(f"Error processing data: {e}")

	st.header("Query the Tariff Data")
	prompt = st.text_input("Enter your query")

	if st.button("Get Answer"):
	if prompt:
	with st.spinner("Fetching response..."):
	try:
	if not (INDEX and CHUNKS and MODEL):
	st.error("Data has not been processed yet. Please process the data first.")
	else:
	# Retrieve relevant chunks
	relevant_chunks = search_faiss(prompt, INDEX, CHUNKS, MODEL)
	context = "\n".join(relevant_chunks)

	# Query the LLM with context
	response = query_llm(prompt, context)
	st.write(response)
	except Exception as e:
	st.error(f"Error querying the model: {e}")
	else:
	st.warning("Please enter a query to continue.")