tour / app.py
amkj84's picture
Update app.py
e70fe90 verified
import streamlit as st
from groq import Groq
import os
import fitz # PyMuPDF
from fpdf import FPDF
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
# Initialize Groq client with your API key
client = Groq(api_key="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")
# Initialize Sentence Transformer for embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
# Function to extract text from PDFs
def extract_text_from_pdfs(data_folder):
text = ""
for filename in os.listdir(data_folder):
if filename.endswith(".pdf"):
file_path = os.path.join(data_folder, filename)
doc = fitz.open(file_path)
for page_num in range(doc.page_count):
page = doc.load_page(page_num)
text += page.get_text()
return text
# Function to chunk and tokenize text
def chunk_text(text, chunk_size=500):
sentences = text.split(".")
chunks = []
for i in range(0, len(sentences), chunk_size):
chunk = " ".join(sentences[i:i+chunk_size])
chunks.append(chunk)
return chunks
# Function to create embeddings for the chunks
def create_embeddings(chunks):
embeddings = model.encode(chunks, convert_to_tensor=True)
return embeddings
# Function to create FAISS index and query the text
def create_faiss_index(embeddings, chunks):
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)
return index
def search_faiss_index(query, index, chunks, k=3):
query_embedding = model.encode([query], convert_to_tensor=True)
distances, indices = index.search(query_embedding, k)
results = [chunks[i] for i in indices[0]]
return results
# Function to generate itinerary using RAG approach
def generate_itinerary_with_rag(duration, location, preference, index, chunks):
# Retrieve relevant information from PDF text
query = f"Plan a {duration}-day trip starting from {location}. The preference is to visit {preference}."
relevant_chunks = search_faiss_index(query, index, chunks)
# Concatenate the relevant chunks to form the context for Groq
context = "\n".join(relevant_chunks)
# Enhance the prompt for better response generation
prompt = f"""
You are a travel expert. Based on the following information, generate a detailed {duration}-day travel itinerary for Pakistan.
Duration: {duration} days
Starting Location: {location}
Preferred Regions: {preference}
The context for your plan:
{context}
The itinerary should include:
1. A day-wise schedule with suggested places to visit.
2. Estimated travel times between locations.
3. Recommended transportation options.
4. Accommodation suggestions.
5. Dining options.
6. Local experiences.
7. Travel tips (weather, cultural considerations).
"""
# Request Groq API for itinerary generation
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama3-8b-8192"
)
# Return the generated itinerary
return chat_completion.choices[0].message.content
# Function to generate PDF
def generate_pdf(itinerary, filename="itinerary.pdf"):
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
# Set font
pdf.set_font("Arial", size=12)
# Add a title
pdf.cell(200, 10, txt="Generated Travel Itinerary", ln=True, align="C")
# Add the itinerary text
pdf.multi_cell(0, 10, txt=itinerary)
# Save the PDF to a temporary file
pdf.output(filename)
return filename
# Streamlit App
# Title of the app
st.title("Pakistan Travel Itinerary Generator")
st.markdown("This app generates a personalized travel itinerary for your trip to Pakistan.")
# Google Drive folder where the PDFs are stored
data_folder = '/content/drive/MyDrive/PAKTOURISM'
# Extract and process text from PDFs
if not os.path.exists(data_folder):
st.error(f"Google Drive folder not found: {data_folder}")
else:
st.info("Extracting and processing text from PDF files...")
extracted_text = extract_text_from_pdfs(data_folder)
chunks = chunk_text(extracted_text)
embeddings = create_embeddings(chunks)
faiss_index = create_faiss_index(embeddings, chunks)
# Input fields for user
duration = st.text_input("Duration (days)", "e.g., 5")
location = st.text_input("Location (Current city)", "e.g., Lahore")
preference = st.text_input("Preference (Punjab, KPK, Gilgit, or combination)", "e.g., Punjab, KPK")
# Button to generate itinerary
if st.button("Generate Itinerary"):
if not duration or not location or not preference:
st.error("Please fill in all fields.")
else:
itinerary = generate_itinerary_with_rag(duration, location, preference, faiss_index, chunks)
# Display the generated itinerary
st.subheader("Generated Itinerary:")
st.text(itinerary)
# Provide option to download the itinerary as a text file
file_name_txt = "travel_itinerary.txt"
with open(file_name_txt, "w") as f:
f.write(itinerary)
st.download_button(label="Download Itinerary (Text)", data=open(file_name_txt, "rb"), file_name=file_name_txt)
# Provide option to download the itinerary as a PDF
file_name_pdf = "travel_itinerary.pdf"
generate_pdf(itinerary, file_name_pdf)
st.download_button(label="Download Itinerary (PDF)", data=open(file_name_pdf, "rb"), file_name=file_name_pdf)