Spaces:

amkj84
/

tour

Build error

App Files Files Community

tour / app.py

amkj84

Update app.py

e70fe90 verified over 1 year ago

raw

history blame contribute delete

5.68 kB

	import streamlit as st
	from groq import Groq
	import os
	import fitz # PyMuPDF
	from fpdf import FPDF
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np

	# Initialize Groq client with your API key
	client = Groq(api_key="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")

	# Initialize Sentence Transformer for embeddings
	model = SentenceTransformer('all-MiniLM-L6-v2')

	# Function to extract text from PDFs
	def extract_text_from_pdfs(data_folder):
	text = ""
	for filename in os.listdir(data_folder):
	if filename.endswith(".pdf"):
	file_path = os.path.join(data_folder, filename)
	doc = fitz.open(file_path)
	for page_num in range(doc.page_count):
	page = doc.load_page(page_num)
	text += page.get_text()
	return text

	# Function to chunk and tokenize text
	def chunk_text(text, chunk_size=500):
	sentences = text.split(".")
	chunks = []
	for i in range(0, len(sentences), chunk_size):
	chunk = " ".join(sentences[i:i+chunk_size])
	chunks.append(chunk)
	return chunks

	# Function to create embeddings for the chunks
	def create_embeddings(chunks):
	embeddings = model.encode(chunks, convert_to_tensor=True)
	return embeddings

	# Function to create FAISS index and query the text
	def create_faiss_index(embeddings, chunks):
	index = faiss.IndexFlatL2(embeddings.shape[1])
	index.add(embeddings)
	return index

	def search_faiss_index(query, index, chunks, k=3):
	query_embedding = model.encode([query], convert_to_tensor=True)
	distances, indices = index.search(query_embedding, k)
	results = [chunks[i] for i in indices[0]]
	return results

	# Function to generate itinerary using RAG approach
	def generate_itinerary_with_rag(duration, location, preference, index, chunks):
	# Retrieve relevant information from PDF text
	query = f"Plan a {duration}-day trip starting from {location}. The preference is to visit {preference}."
	relevant_chunks = search_faiss_index(query, index, chunks)

	# Concatenate the relevant chunks to form the context for Groq
	context = "\n".join(relevant_chunks)

	# Enhance the prompt for better response generation
	prompt = f"""
	You are a travel expert. Based on the following information, generate a detailed {duration}-day travel itinerary for Pakistan.
	Duration: {duration} days
	Starting Location: {location}
	Preferred Regions: {preference}

	The context for your plan:
	{context}

	The itinerary should include:
	1. A day-wise schedule with suggested places to visit.
	2. Estimated travel times between locations.
	3. Recommended transportation options.
	4. Accommodation suggestions.
	5. Dining options.
	6. Local experiences.
	7. Travel tips (weather, cultural considerations).
	"""

	# Request Groq API for itinerary generation
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": prompt}],
	model="llama3-8b-8192"
	)

	# Return the generated itinerary
	return chat_completion.choices[0].message.content

	# Function to generate PDF
	def generate_pdf(itinerary, filename="itinerary.pdf"):
	pdf = FPDF()
	pdf.set_auto_page_break(auto=True, margin=15)
	pdf.add_page()

	# Set font
	pdf.set_font("Arial", size=12)

	# Add a title
	pdf.cell(200, 10, txt="Generated Travel Itinerary", ln=True, align="C")

	# Add the itinerary text
	pdf.multi_cell(0, 10, txt=itinerary)

	# Save the PDF to a temporary file
	pdf.output(filename)
	return filename

	# Streamlit App

	# Title of the app
	st.title("Pakistan Travel Itinerary Generator")
	st.markdown("This app generates a personalized travel itinerary for your trip to Pakistan.")

	# Google Drive folder where the PDFs are stored
	data_folder = '/content/drive/MyDrive/PAKTOURISM'

	# Extract and process text from PDFs
	if not os.path.exists(data_folder):
	st.error(f"Google Drive folder not found: {data_folder}")
	else:
	st.info("Extracting and processing text from PDF files...")
	extracted_text = extract_text_from_pdfs(data_folder)
	chunks = chunk_text(extracted_text)
	embeddings = create_embeddings(chunks)
	faiss_index = create_faiss_index(embeddings, chunks)

	# Input fields for user
	duration = st.text_input("Duration (days)", "e.g., 5")
	location = st.text_input("Location (Current city)", "e.g., Lahore")
	preference = st.text_input("Preference (Punjab, KPK, Gilgit, or combination)", "e.g., Punjab, KPK")

	# Button to generate itinerary
	if st.button("Generate Itinerary"):
	if not duration or not location or not preference:
	st.error("Please fill in all fields.")
	else:
	itinerary = generate_itinerary_with_rag(duration, location, preference, faiss_index, chunks)

	# Display the generated itinerary
	st.subheader("Generated Itinerary:")
	st.text(itinerary)

	# Provide option to download the itinerary as a text file
	file_name_txt = "travel_itinerary.txt"
	with open(file_name_txt, "w") as f:
	f.write(itinerary)

	st.download_button(label="Download Itinerary (Text)", data=open(file_name_txt, "rb"), file_name=file_name_txt)

	# Provide option to download the itinerary as a PDF
	file_name_pdf = "travel_itinerary.pdf"
	generate_pdf(itinerary, file_name_pdf)

	st.download_button(label="Download Itinerary (PDF)", data=open(file_name_pdf, "rb"), file_name=file_name_pdf)