portfolio-website / agent_startup.py
Ayush Rai
Fix write paths for HF Spaces
ec103dc
raw
history blame
2 kB
import os
from dotenv import load_dotenv
load_dotenv()
# Optionally write Google SA JSON (secret) to /tmp/sa.json for Gemini auth
sa_json = os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
if sa_json:
sa_path = "/tmp/sa.json" # changed from /data to /tmp
if not os.path.exists(sa_path):
with open(sa_path, "w", encoding="utf-8") as f:
f.write(sa_json)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = sa_path
# --- embeddings + vectordb + llm (same as yours) ---
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
# Force Hugging Face to use a writable cache directory
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
os.environ["HF_HOME"] = "/tmp/hf_home"
print("loading embedding")
embedding_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/paraphrase-MiniLM-L3-v2",
model_kwargs={'device': "cpu"},
cache_folder="/tmp/hf_cache"
)
persist_dir = os.getenv("PERSIST_DIR", "/tmp/chroma_db")
os.makedirs(persist_dir, exist_ok=True)
print("loading vectordb")
vectordb = Chroma(
persist_directory=persist_dir,
embedding_function=embedding_model,
collection_name="general_embeddings"
)
# Load resume into DB if empty
try:
ids = vectordb.get().get("ids", [])
except Exception:
ids = []
if len(ids) == 0:
resume_path = os.getenv("RESUME_PATH", "media/resume/resume-ayush.pdf")
if os.path.exists(resume_path):
loader = PyMuPDFLoader(resume_path)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=580, chunk_overlap=60)
split_docs = splitter.split_documents(docs)
vectordb.add_documents(split_docs)
print("loading llm")
llm = ChatGoogleGenerativeAI(temperature=0.2, model=os.getenv("GEMINI_MODEL", "gemini-2.0-flash"))