Spaces:

anshumanpatil
/

ramayan_rag

Sleeping

App Files Files Community

anshumanpatil commited on Aug 23, 2025

Commit

ad4f7fb

1 Parent(s): 77ddb31

add other parameters

Browse files

Files changed (1) hide show

app.py +14 -14

app.py CHANGED Viewed

@@ -8,14 +8,15 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.schema import Document
 from sentence_transformers import SentenceTransformer
 # ------------------------------
 # Title
 # ------------------------------
-st.title("📚 RAG Chatbot with TinyLlama")
 # ------------------------------
-# Load TinyLlama
 # ------------------------------
 @st.cache_resource
 def load_model():
@@ -24,27 +25,21 @@ def load_model():
     model = AutoModelForCausalLM.from_pretrained(model_name)
     return pipeline("text-generation", model=model, tokenizer=tokenizer)
-with st.spinner("🔄 Loading TinyLlama..."):
     generator = load_model()
 # ------------------------------
 # File Upload
 # ------------------------------
-uploaded_file = st.file_uploader("📂 Upload a file (PDF, DOCX, CSV)", type=["pdf", "docx", "csv"])
 # ------------------------------
 # Extract Text
 # ------------------------------
 def extract_text(file):
-    if file.type == "application/pdf":
-        pdf_reader = pypdf.PdfReader(file)
-        return "\n".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])
-    elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
-        return docx2txt.process(file)
-    elif file.type == "text/csv":
-        df = pd.read_csv(file)
-        return df.to_string(index=False)
-    return ""
 # ------------------------------
 # Build FAISS Index
@@ -56,6 +51,9 @@ def build_faiss(_docs):
 docs = []
 db = None
 if uploaded_file:
     text = extract_text(uploaded_file)
     if text:
@@ -63,11 +61,13 @@ if uploaded_file:
         docs = [Document(page_content=chunk) for chunk in splitter.split_text(text)]
         db = build_faiss(docs)
         st.success("✅ Knowledge Base ready!")
 # ------------------------------
 # Chat
 # ------------------------------
-query = st.text_input("💬 Ask a question about the uploaded document:")
 if query and db:
     retriever = db.as_retriever(search_kwargs={"k": 3})

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.schema import Document
 from sentence_transformers import SentenceTransformer
+from langchain_community.document_loaders import DirectoryLoader, TextLoader
 # ------------------------------
 # Title
 # ------------------------------
+st.title("📚 RAG For MSCI Indexes")
 # ------------------------------
+# Load Model for pretraining
 # ------------------------------
 @st.cache_resource
 def load_model():
     model = AutoModelForCausalLM.from_pretrained(model_name)
     return pipeline("text-generation", model=model, tokenizer=tokenizer)
+with st.spinner("🔄 Loading Model..."):
     generator = load_model()
 # ------------------------------
 # File Upload
 # ------------------------------
+uploaded_file = "msci.txt"
 # ------------------------------
 # Extract Text
 # ------------------------------
 def extract_text(file):
+    loader = TextLoader(file, encoding = "utf-8")
+    return loader.load()[0].page_content
+    # return "\n".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])
 # ------------------------------
 # Build FAISS Index
 docs = []
 db = None
+query = st.text_input("💬 Ask a question about MSCI Indexes:")
 if uploaded_file:
     text = extract_text(uploaded_file)
     if text:
         docs = [Document(page_content=chunk) for chunk in splitter.split_text(text)]
         db = build_faiss(docs)
         st.success("✅ Knowledge Base ready!")
+        st.info("You can ask any question regarding data feed to model is as below!")
+        long_text = st.text_area(text, height=150, disabled=True)
 # ------------------------------
 # Chat
 # ------------------------------
 if query and db:
     retriever = db.as_retriever(search_kwargs={"k": 3})