Spaces:

Rahaf2001
/

RAG-Project

Sleeping

App Files Files Community

Rahaf2001 commited on Oct 22, 2025

Commit

f25282e

verified ·

1 Parent(s): 5aa3790

Upload rag_core.py

Browse files

Files changed (1) hide show

rag_core.py +70 -0

rag_core.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import requests
+from bs4 import BeautifulSoup
+from langchain_community.document_loaders import WebBaseLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.chains import create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+import os
+# --- Global variables for RAG components ---
+vector_store = None
+llm = None
+retrieval_chain = None
+def initialize_rag_components():
+    global llm
+    llm = ChatOpenAI(model="gemini-2.5-flash", temperature=0.3)
+def scrape_and_process_url(url: str) -> str:
+    global vector_store, retrieval_chain
+    try:
+        # Scrape content using WebBaseLoader for simplicity and robustness
+        # This handles parsing and extracting main content from various web pages
+        loader = WebBaseLoader(url)
+        docs = loader.load()
+        if not docs:
+            return "Failed to load content from the URL. Please check the URL or try another one."
+        # Split documents into smaller chunks
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        chunks = text_splitter.split_documents(docs)
+        # Create embeddings and vector store
+        # Ensure OPENAI_API_KEY is set as an environment variable in Hugging Face Spaces
+        embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+        vector_store = FAISS.from_documents(chunks, embeddings)
+        # Create RAG chain
+        prompt = ChatPromptTemplate.from_messages([
+            ("system", "Answer the user's questions based on the provided context only. "
+             "If you don't know the answer, just say that you don't know, don't make up an answer.\n\n{context}"),
+            ("user", "{input}")
+        ])
+        document_chain = create_stuff_documents_chain(llm, prompt)
+        retrieval_chain = create_retrieval_chain(vector_store.as_retriever(), document_chain)
+        return f"Successfully scraped and processed content from {url}. You can now ask questions."
+    except Exception as e:
+        return f"An error occurred during scraping or processing: {str(e)}"
+def answer_question(question: str) -> str:
+    global retrieval_chain
+    if retrieval_chain is None:
+        return "Please scrape and process a URL first before asking questions."
+    try:
+        response = retrieval_chain.invoke({"input": question})
+        return response["answer"]
+    except Exception as e:
+        return f"An error occurred while answering the question: {str(e)}"
+# Initialize LLM when the module is imported
+initialize_rag_components()