Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| import serpapi | |
| load_dotenv() | |
| os.environ["HF_HOME"] = "/tmp/huggingface" | |
| os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers" | |
| os.environ["HF_DATASETS_CACHE"] = "/tmp/huggingface/datasets" | |
| # LLM (Groq + LLaMA3) | |
| llm = ChatOpenAI( | |
| model="llama3-8b-8192", | |
| openai_api_base="https://api.groq.com/openai/v1", | |
| openai_api_key=os.environ["GROQ_API_KEY"] | |
| ) | |
| # Embeddings (HuggingFace) | |
| embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| # Load PDFs and create FAISS vectorstore | |
| def load_vectorstore(pdf_dir="pdfs/"): | |
| docs = [] | |
| for file in os.listdir(pdf_dir): | |
| if file.endswith(".pdf"): | |
| loader = PyPDFLoader(os.path.join(pdf_dir, file)) | |
| docs.extend(loader.load()) | |
| splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| chunks = splitter.split_documents(docs) | |
| return FAISS.from_documents(chunks, embedding=embeddings) | |
| # Custom Web Search tool using SerpAPI | |
| def search_tool(query: str): | |
| client = serpapi.Client(api_key=os.getenv("SERPAPI_API_KEY")) | |
| search = client.search({ | |
| "engine": "google", | |
| "q": query, | |
| }) | |
| results = dict(search) | |
| return results["organic_results"][0]["snippet"] # Return the snippet or any part of the result | |