Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # Optionally write Google SA JSON (secret) to /tmp/sa.json for Gemini auth | |
| sa_json = os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON") | |
| if sa_json: | |
| sa_path = "/tmp/sa.json" # changed from /data to /tmp | |
| if not os.path.exists(sa_path): | |
| with open(sa_path, "w", encoding="utf-8") as f: | |
| f.write(sa_json) | |
| os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = sa_path | |
| # --- embeddings + vectordb + llm (same as yours) --- | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_chroma import Chroma | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_community.document_loaders import PyMuPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| import os | |
| # Force Hugging Face to use a writable cache directory | |
| os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache" | |
| os.environ["HF_HOME"] = "/tmp/hf_home" | |
| print("loading embedding") | |
| embedding_model = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/paraphrase-MiniLM-L3-v2", | |
| model_kwargs={'device': "cpu"}, | |
| cache_folder="/tmp/hf_cache" | |
| ) | |
| persist_dir = os.getenv("PERSIST_DIR", "/tmp/chroma_db") | |
| os.makedirs(persist_dir, exist_ok=True) | |
| print("loading vectordb") | |
| vectordb = Chroma( | |
| persist_directory=persist_dir, | |
| embedding_function=embedding_model, | |
| collection_name="general_embeddings" | |
| ) | |
| # Load resume into DB if empty | |
| try: | |
| ids = vectordb.get().get("ids", []) | |
| except Exception: | |
| ids = [] | |
| if len(ids) == 0: | |
| resume_path = os.getenv("RESUME_PATH", "media/resume/resume-ayush.pdf") | |
| if os.path.exists(resume_path): | |
| loader = PyMuPDFLoader(resume_path) | |
| docs = loader.load() | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=580, chunk_overlap=60) | |
| split_docs = splitter.split_documents(docs) | |
| vectordb.add_documents(split_docs) | |
| print("loading llm") | |
| llm = ChatGoogleGenerativeAI(temperature=0.2, model=os.getenv("GEMINI_MODEL", "gemini-2.0-flash")) | |