Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- .gitattributes +3 -0
- Dataset/Lenny Delligatti - SysML Distilled A Brief Guide to the Systems Modeling Language-Addison-Wesley Professional (2013).pdf +3 -0
- Dataset/OMG Systems Modeling Language (OMG SysML).pdf +3 -0
- Dataset/The_SysML_Modelling_Language.pdf +3 -0
- Dataset/sysmodeler_user_manual.pdf +0 -0
- vdb_script/faiss_vdb_script.py +50 -0
- vdb_script/requirements.txt +10 -0
.gitattributes
CHANGED
|
@@ -34,3 +34,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
faiss_index_sysml/index.faiss filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
faiss_index_sysml/index.faiss filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
Dataset/Lenny[[:space:]]Delligatti[[:space:]]-[[:space:]]SysML[[:space:]]Distilled[[:space:]][[:space:]]A[[:space:]]Brief[[:space:]]Guide[[:space:]]to[[:space:]]the[[:space:]]Systems[[:space:]]Modeling[[:space:]]Language-Addison-Wesley[[:space:]]Professional[[:space:]](2013).pdf filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
Dataset/OMG[[:space:]]Systems[[:space:]]Modeling[[:space:]]Language[[:space:]](OMG[[:space:]]SysML).pdf filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
Dataset/The_SysML_Modelling_Language.pdf filter=lfs diff=lfs merge=lfs -text
|
Dataset/Lenny Delligatti - SysML Distilled A Brief Guide to the Systems Modeling Language-Addison-Wesley Professional (2013).pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:adad4affd9427f87a0ec9217abf30bab0cef7c2cc438023665e66c90d5ed6f9d
|
| 3 |
+
size 6350490
|
Dataset/OMG Systems Modeling Language (OMG SysML).pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:633007f2b5e8257f40b0315732d5c0720c1e025cd6bf78dfd9c0aa43eaff23fe
|
| 3 |
+
size 3613524
|
Dataset/The_SysML_Modelling_Language.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:406089eb1d4f67ca35cfa42cce377ad7435b30e5baa435cdd00847afdec635dd
|
| 3 |
+
size 233280
|
Dataset/sysmodeler_user_manual.pdf
ADDED
|
Binary file (36.4 kB). View file
|
|
|
vdb_script/faiss_vdb_script.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
|
| 3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 4 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
| 5 |
+
from langchain_community.vectorstores import FAISS
|
| 6 |
+
from langchain.chains import RetrievalQA
|
| 7 |
+
from langchain.llms import OpenAI
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
|
| 10 |
+
# Load environment variable for OpenAI key
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 14 |
+
if not OPENAI_API_KEY:
|
| 15 |
+
raise ValueError("Missing OPENAI_API_KEY in environment variables.")
|
| 16 |
+
|
| 17 |
+
# Extract Data from the PDFs
|
| 18 |
+
def load_pdf_file(data_path):
|
| 19 |
+
loader = DirectoryLoader(data_path, glob="*.pdf", loader_cls=PyPDFLoader)
|
| 20 |
+
documents = loader.load()
|
| 21 |
+
return documents
|
| 22 |
+
|
| 23 |
+
# Split the data into chunks
|
| 24 |
+
def text_split(docs):
|
| 25 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
|
| 26 |
+
return splitter.split_documents(docs)
|
| 27 |
+
|
| 28 |
+
# Set up LLM and Embedding
|
| 29 |
+
llm = OpenAI(model_name="gpt-4o-mini", temperature=0.5, openai_api_key=OPENAI_API_KEY)
|
| 30 |
+
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
|
| 31 |
+
|
| 32 |
+
# Load PDF, chunk it, embed it, and store in FAISS
|
| 33 |
+
pdf_docs = load_pdf_file("/kaggle/input/rag-test") # Update this to your PDF folder
|
| 34 |
+
chunks = text_split(pdf_docs)
|
| 35 |
+
|
| 36 |
+
vectorstore = FAISS.from_documents(chunks, embeddings)
|
| 37 |
+
vectorstore.save_local("faiss_index_sysml")
|
| 38 |
+
|
| 39 |
+
# Load FAISS and create retriever QA chain
|
| 40 |
+
# new_vectorstore = FAISS.load_local("faiss_index_sysml", embeddings, allow_dangerous_deserialization=True)
|
| 41 |
+
# qa = RetrievalQA.from_chain_type(
|
| 42 |
+
# llm=llm,
|
| 43 |
+
# chain_type="stuff",
|
| 44 |
+
# retriever=new_vectorstore.as_retriever()
|
| 45 |
+
# )
|
| 46 |
+
|
| 47 |
+
# # Run a sample query
|
| 48 |
+
# query = "What is SysML used for?"
|
| 49 |
+
# print("User:", query)
|
| 50 |
+
# print("Bot:", qa.run(query))
|
vdb_script/requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
langchain==0.3.25
|
| 2 |
+
langchain-community==0.3.24
|
| 3 |
+
langchain-core==0.3.60
|
| 4 |
+
langchain-openai==0.3.17
|
| 5 |
+
openai==1.79.0
|
| 6 |
+
faiss-cpu==1.11.0
|
| 7 |
+
python-dotenv==1.1.0
|
| 8 |
+
gradio==4.15.0
|
| 9 |
+
gradio_client==0.8.1
|
| 10 |
+
huggingface_hub >= 0.19.3
|