anshumanpatil commited on
Commit
25005d0
Β·
1 Parent(s): 664007d
Files changed (1) hide show
  1. app.py +27 -20
app.py CHANGED
@@ -14,6 +14,12 @@ load_dotenv()
14
 
15
  model_name = os.getenv("MODEL_NAME")
16
  embedding_model_name = os.getenv("EMBEDDING_MODEL_NAME")
 
 
 
 
 
 
17
  # ------------------------------
18
  # Title
19
  # ------------------------------
@@ -29,17 +35,11 @@ def load_model():
29
  model = AutoModelForCausalLM.from_pretrained(model_name)
30
  return pipeline("text-generation", model=model, tokenizer=tokenizer)
31
 
32
- with st.spinner("πŸ”„ Loading Model..."):
33
- generator = load_model()
34
-
35
- # ------------------------------
36
- # Extract Text
37
- # ------------------------------
38
- uploaded_file = "./msci"
39
-
40
- def extract_text(folder_path):
41
  loader = DirectoryLoader(
42
- path=folder_path,
43
  glob="**/*.txt",
44
  loader_cls=TextLoader,
45
  recursive=True
@@ -47,6 +47,18 @@ def extract_text(folder_path):
47
  documents = loader.load()
48
  return "\n".join([doc.page_content for doc in documents])
49
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  # ------------------------------
51
  # Build FAISS Index
52
  # ------------------------------
@@ -55,19 +67,14 @@ def build_faiss(_docs):
55
  embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
56
  return FAISS.from_documents(_docs, embeddings)
57
 
58
- docs = []
59
- db = None
 
 
 
60
 
61
  query = st.text_input("πŸ’¬ Ask a question about MSCI Indexes", placeholder="MSCI World IMI Index")
62
 
63
- if uploaded_file:
64
- text = extract_text(uploaded_file)
65
- if text:
66
- splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
67
- docs = [Document(page_content=chunk) for chunk in splitter.split_text(text)]
68
- db = build_faiss(docs)
69
- st.success("βœ… Knowledge Base ready! From :- https://www.msci.com/indexes#featured-indexes")
70
-
71
  if query and db:
72
  retriever = db.as_retriever(search_kwargs={"k": 3})
73
  retrieved_docs = retriever.get_relevant_documents(query)
 
14
 
15
  model_name = os.getenv("MODEL_NAME")
16
  embedding_model_name = os.getenv("EMBEDDING_MODEL_NAME")
17
+
18
+
19
+ docs = []
20
+ db = None
21
+ extracted_text = None
22
+
23
  # ------------------------------
24
  # Title
25
  # ------------------------------
 
35
  model = AutoModelForCausalLM.from_pretrained(model_name)
36
  return pipeline("text-generation", model=model, tokenizer=tokenizer)
37
 
38
+ @st.cache_resource
39
+ def extract_text():
40
+ uploaded_data_path = "./msci"
 
 
 
 
 
 
41
  loader = DirectoryLoader(
42
+ path=uploaded_data_path,
43
  glob="**/*.txt",
44
  loader_cls=TextLoader,
45
  recursive=True
 
47
  documents = loader.load()
48
  return "\n".join([doc.page_content for doc in documents])
49
 
50
+
51
+ with st.spinner("πŸ”„ Loading Model..."):
52
+ generator = load_model()
53
+ with st.spinner("πŸ”„ Loading Knowldge Base..."):
54
+ extracted_text = extract_text()
55
+
56
+ # ------------------------------
57
+ # Extract Text
58
+ # ------------------------------
59
+
60
+
61
+
62
  # ------------------------------
63
  # Build FAISS Index
64
  # ------------------------------
 
67
  embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
68
  return FAISS.from_documents(_docs, embeddings)
69
 
70
+ if extracted_text:
71
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
72
+ docs = [Document(page_content=chunk) for chunk in splitter.split_text(extracted_text)]
73
+ db = build_faiss(docs)
74
+ st.success("βœ… Knowledge Base ready! From :- https://www.msci.com/indexes#featured-indexes")
75
 
76
  query = st.text_input("πŸ’¬ Ask a question about MSCI Indexes", placeholder="MSCI World IMI Index")
77
 
 
 
 
 
 
 
 
 
78
  if query and db:
79
  retriever = db.as_retriever(search_kwargs={"k": 3})
80
  retrieved_docs = retriever.get_relevant_documents(query)