anshumanpatil commited on
Commit
ad4f7fb
Β·
1 Parent(s): 77ddb31

add other parameters

Browse files
Files changed (1) hide show
  1. app.py +14 -14
app.py CHANGED
@@ -8,14 +8,15 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  from langchain.schema import Document
10
  from sentence_transformers import SentenceTransformer
 
11
 
12
  # ------------------------------
13
  # Title
14
  # ------------------------------
15
- st.title("πŸ“š RAG Chatbot with TinyLlama")
16
 
17
  # ------------------------------
18
- # Load TinyLlama
19
  # ------------------------------
20
  @st.cache_resource
21
  def load_model():
@@ -24,27 +25,21 @@ def load_model():
24
  model = AutoModelForCausalLM.from_pretrained(model_name)
25
  return pipeline("text-generation", model=model, tokenizer=tokenizer)
26
 
27
- with st.spinner("πŸ”„ Loading TinyLlama..."):
28
  generator = load_model()
29
 
30
  # ------------------------------
31
  # File Upload
32
  # ------------------------------
33
- uploaded_file = st.file_uploader("πŸ“‚ Upload a file (PDF, DOCX, CSV)", type=["pdf", "docx", "csv"])
34
 
35
  # ------------------------------
36
  # Extract Text
37
  # ------------------------------
38
  def extract_text(file):
39
- if file.type == "application/pdf":
40
- pdf_reader = pypdf.PdfReader(file)
41
- return "\n".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])
42
- elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
43
- return docx2txt.process(file)
44
- elif file.type == "text/csv":
45
- df = pd.read_csv(file)
46
- return df.to_string(index=False)
47
- return ""
48
 
49
  # ------------------------------
50
  # Build FAISS Index
@@ -56,6 +51,9 @@ def build_faiss(_docs):
56
 
57
  docs = []
58
  db = None
 
 
 
59
  if uploaded_file:
60
  text = extract_text(uploaded_file)
61
  if text:
@@ -63,11 +61,13 @@ if uploaded_file:
63
  docs = [Document(page_content=chunk) for chunk in splitter.split_text(text)]
64
  db = build_faiss(docs)
65
  st.success("βœ… Knowledge Base ready!")
 
 
66
 
67
  # ------------------------------
68
  # Chat
69
  # ------------------------------
70
- query = st.text_input("πŸ’¬ Ask a question about the uploaded document:")
71
 
72
  if query and db:
73
  retriever = db.as_retriever(search_kwargs={"k": 3})
 
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  from langchain.schema import Document
10
  from sentence_transformers import SentenceTransformer
11
+ from langchain_community.document_loaders import DirectoryLoader, TextLoader
12
 
13
  # ------------------------------
14
  # Title
15
  # ------------------------------
16
+ st.title("πŸ“š RAG For MSCI Indexes")
17
 
18
  # ------------------------------
19
+ # Load Model for pretraining
20
  # ------------------------------
21
  @st.cache_resource
22
  def load_model():
 
25
  model = AutoModelForCausalLM.from_pretrained(model_name)
26
  return pipeline("text-generation", model=model, tokenizer=tokenizer)
27
 
28
+ with st.spinner("πŸ”„ Loading Model..."):
29
  generator = load_model()
30
 
31
  # ------------------------------
32
  # File Upload
33
  # ------------------------------
34
+ uploaded_file = "msci.txt"
35
 
36
  # ------------------------------
37
  # Extract Text
38
  # ------------------------------
39
  def extract_text(file):
40
+ loader = TextLoader(file, encoding = "utf-8")
41
+ return loader.load()[0].page_content
42
+ # return "\n".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])
 
 
 
 
 
 
43
 
44
  # ------------------------------
45
  # Build FAISS Index
 
51
 
52
  docs = []
53
  db = None
54
+
55
+ query = st.text_input("πŸ’¬ Ask a question about MSCI Indexes:")
56
+
57
  if uploaded_file:
58
  text = extract_text(uploaded_file)
59
  if text:
 
61
  docs = [Document(page_content=chunk) for chunk in splitter.split_text(text)]
62
  db = build_faiss(docs)
63
  st.success("βœ… Knowledge Base ready!")
64
+ st.info("You can ask any question regarding data feed to model is as below!")
65
+ long_text = st.text_area(text, height=150, disabled=True)
66
 
67
  # ------------------------------
68
  # Chat
69
  # ------------------------------
70
+
71
 
72
  if query and db:
73
  retriever = db.as_retriever(search_kwargs={"k": 3})