amn-sdqi commited on
Commit
5c6086a
Β·
verified Β·
1 Parent(s): 676f2da

Upload 7 files

Browse files
Files changed (7) hide show
  1. .env +1 -0
  2. .gitattributes +35 -35
  3. .gitignore +2 -0
  4. README.md +12 -12
  5. app.py +87 -87
  6. app_bkp.py +68 -68
  7. requirements.txt +12 -11
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ GOOGLE_API_KEY = "AIzaSyDUeED6C6qepr7gQtmfyPza5nMjQbhbaOw"
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ .gitignore
README.md CHANGED
@@ -1,12 +1,12 @@
1
- ---
2
- title: Chatbot
3
- emoji: πŸ†
4
- colorFrom: pink
5
- colorTo: red
6
- sdk: streamlit
7
- sdk_version: 1.44.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Chatbot
3
+ emoji: πŸ†
4
+ colorFrom: pink
5
+ colorTo: red
6
+ sdk: streamlit
7
+ sdk_version: 1.44.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,87 +1,87 @@
1
- import streamlit as st
2
- from langchain.document_loaders import PyPDFLoader
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain.vectorstores import Chroma
5
- from langchain.embeddings import HuggingFaceEmbeddings
6
- from langchain.chains import RetrievalQA
7
- from langchain_google_genai import ChatGoogleGenerativeAI
8
- import tempfile
9
- import os
10
- from dotenv import load_dotenv
11
- from pydantic import SecretStr
12
-
13
-
14
- load_dotenv()
15
- GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
16
-
17
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
18
-
19
- # ---------------------------- SETUP ----------------------------
20
-
21
- st.title("πŸ“„ LangChain RAG Chatbot")
22
-
23
- # Session state
24
- if "chat_history" not in st.session_state:
25
- st.session_state.chat_history = []
26
-
27
- if "qa_chain" not in st.session_state:
28
- st.session_state.qa_chain = None
29
-
30
- # ---------------------------- FILE UPLOAD ----------------------------
31
-
32
- st.subheader("Upload your PDF")
33
- pdf_file = st.file_uploader("Upload", type="pdf")
34
-
35
- if pdf_file is not None and st.session_state.qa_chain is None:
36
- with st.spinner("πŸ” Processing document..."):
37
- # Save file temporarily
38
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
39
- tmp_file.write(pdf_file.read())
40
- tmp_path = tmp_file.name
41
-
42
- # Load and split PDF
43
- loader = PyPDFLoader(tmp_path)
44
- documents = loader.load_and_split()
45
-
46
- splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
47
- chunks = splitter.split_documents(documents)
48
-
49
- # Vector store
50
-
51
- vectordb = Chroma.from_documents(
52
- chunks, embeddings, persist_directory="./chroma_db"
53
- )
54
- retriever = vectordb.as_retriever()
55
-
56
- # QA Chain
57
- llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=SecretStr(GOOGLE_API_KEY) if GOOGLE_API_KEY else None)
58
- qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
59
-
60
- # Store in session
61
- st.session_state.qa_chain = qa_chain
62
- st.success("βœ… Document processed and indexed!")
63
-
64
- # ---------------------------- CHAT ----------------------------
65
-
66
- if st.session_state.qa_chain:
67
- st.subheader("πŸ’¬ Ask a question")
68
-
69
- question = st.text_input("You:", key="user_input")
70
-
71
- if question:
72
- with st.spinner("πŸ€– Generating answer..."):
73
- answer = st.session_state.qa_chain.run(question)
74
- st.session_state.chat_history.append({"user": question, "bot": answer})
75
-
76
- # Display chat history
77
- for chat in st.session_state.chat_history:
78
- st.markdown(f"πŸ§‘ **You:** {chat['user']}")
79
- st.markdown(f"πŸ€– **Bot:** {chat['bot']}")
80
-
81
- # Reset button
82
- if st.button("πŸ”„ Reset Chat"):
83
- st.session_state.chat_history = []
84
- st.session_state.qa_chain = None
85
- st.rerun()
86
- else:
87
- st.info("πŸ“‚ Please upload a PDF to begin.")
 
1
+ import streamlit as st
2
+ from langchain.document_loaders import PyPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.chains import RetrievalQA
7
+ from langchain_google_genai import ChatGoogleGenerativeAI
8
+ import tempfile
9
+ import os
10
+ from dotenv import load_dotenv
11
+ from pydantic import SecretStr
12
+
13
+
14
+ load_dotenv()
15
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
16
+
17
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
18
+
19
+ # ---------------------------- SETUP ----------------------------
20
+
21
+ st.title("πŸ“„ LangChain RAG Chatbot")
22
+
23
+ # Session state
24
+ if "chat_history" not in st.session_state:
25
+ st.session_state.chat_history = []
26
+
27
+ if "qa_chain" not in st.session_state:
28
+ st.session_state.qa_chain = None
29
+
30
+ # ---------------------------- FILE UPLOAD ----------------------------
31
+
32
+ st.subheader("Upload your PDF")
33
+ pdf_file = st.file_uploader("Upload", type="pdf")
34
+
35
+ if pdf_file is not None and st.session_state.qa_chain is None:
36
+ with st.spinner("πŸ” Processing document..."):
37
+ # Save file temporarily
38
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
39
+ tmp_file.write(pdf_file.read())
40
+ tmp_path = tmp_file.name
41
+
42
+ # Load and split PDF
43
+ loader = PyPDFLoader(tmp_path)
44
+ documents = loader.load_and_split()
45
+
46
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
47
+ chunks = splitter.split_documents(documents)
48
+
49
+ # Vector store
50
+
51
+ vectordb = Chroma.from_documents(
52
+ chunks, embeddings, persist_directory="./chroma_db"
53
+ )
54
+ retriever = vectordb.as_retriever()
55
+
56
+ # QA Chain
57
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=SecretStr(GOOGLE_API_KEY) if GOOGLE_API_KEY else None)
58
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
59
+
60
+ # Store in session
61
+ st.session_state.qa_chain = qa_chain
62
+ st.success("βœ… Document processed and indexed!")
63
+
64
+ # ---------------------------- CHAT ----------------------------
65
+
66
+ if st.session_state.qa_chain:
67
+ st.subheader("πŸ’¬ Ask a question")
68
+
69
+ question = st.text_input("You:", key="user_input")
70
+
71
+ if question:
72
+ with st.spinner("πŸ€– Generating answer..."):
73
+ answer = st.session_state.qa_chain.run(question)
74
+ st.session_state.chat_history.append({"user": question, "bot": answer})
75
+
76
+ # Display chat history
77
+ for chat in st.session_state.chat_history:
78
+ st.markdown(f"πŸ§‘ **You:** {chat['user']}")
79
+ st.markdown(f"πŸ€– **Bot:** {chat['bot']}")
80
+
81
+ # Reset button
82
+ if st.button("πŸ”„ Reset Chat"):
83
+ st.session_state.chat_history = []
84
+ st.session_state.qa_chain = None
85
+ st.rerun()
86
+ else:
87
+ st.info("πŸ“‚ Please upload a PDF to begin.")
app_bkp.py CHANGED
@@ -1,68 +1,68 @@
1
- import streamlit as st
2
- from transformers import pipeline
3
- import fitz
4
-
5
- qa = pipeline("question-answering", model="deepset/roberta-base-squad2", device=0)
6
- text_gen = pipeline("text2text-generation", model="google/flan-t5-base", device=0)
7
-
8
-
9
- # extract text from uploaded document
10
- def extract_PDF(file):
11
- text = ""
12
- with fitz.open(stream=file.read(), filetype="pdf") as doc:
13
- for page in doc:
14
- text += page.get_text() # type: ignore
15
- return text
16
-
17
-
18
- # ------------------------------------------------------------------------------
19
-
20
- # -----------------------------------Streamlit UI--------------------------------
21
-
22
- st.title("Chatbot with Huggingface")
23
-
24
- st.subheader("Upload file")
25
- pdf_file = st.file_uploader("Upload", type="pdf")
26
-
27
- # Initialize Session state for convo history
28
-
29
- if "chat_history" not in st.session_state:
30
- st.session_state.chat_history = []
31
-
32
- if "context" not in st.session_state:
33
- st.session_state.context = None
34
-
35
- # extract text and store in the session
36
- if pdf_file is not None and st.session_state.context is None:
37
- st.session_state.context = extract_PDF(pdf_file)
38
-
39
-
40
- # Chat section
41
-
42
- if st.session_state.context:
43
- st.subheader("Chat with the PDF")
44
-
45
- question = st.text_input("You", key="user_input")
46
-
47
-
48
- if question:
49
- result = qa(question=question, context=st.session_state.context) # type: ignore
50
-
51
- context_chunk = st.session_state.context[:1500]
52
- prompt = f"Context: {context_chunk}\nQuestion: {question}\nAnswer:"
53
-
54
- generated = text_gen(prompt, max_length=100)[0]['generated_text'] # type: ignore
55
-
56
- # save convo
57
- st.session_state.chat_history.append(
58
- {"user": question, "bot": generated}
59
- )
60
-
61
- # Display chat
62
-
63
- for chat in st.session_state.chat_history:
64
- st.markdown(f"**You:** {chat['user']}")
65
- st.markdown(f"**Bot:** {chat['bot']}")
66
-
67
- else:
68
- st.info("Please upload PDF to begin")
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ import fitz
4
+
5
+ qa = pipeline("question-answering", model="deepset/roberta-base-squad2", device=0)
6
+ text_gen = pipeline("text2text-generation", model="google/flan-t5-base", device=0)
7
+
8
+
9
+ # extract text from uploaded document
10
+ def extract_PDF(file):
11
+ text = ""
12
+ with fitz.open(stream=file.read(), filetype="pdf") as doc:
13
+ for page in doc:
14
+ text += page.get_text() # type: ignore
15
+ return text
16
+
17
+
18
+ # ------------------------------------------------------------------------------
19
+
20
+ # -----------------------------------Streamlit UI--------------------------------
21
+
22
+ st.title("Chatbot with Huggingface")
23
+
24
+ st.subheader("Upload file")
25
+ pdf_file = st.file_uploader("Upload", type="pdf")
26
+
27
+ # Initialize Session state for convo history
28
+
29
+ if "chat_history" not in st.session_state:
30
+ st.session_state.chat_history = []
31
+
32
+ if "context" not in st.session_state:
33
+ st.session_state.context = None
34
+
35
+ # extract text and store in the session
36
+ if pdf_file is not None and st.session_state.context is None:
37
+ st.session_state.context = extract_PDF(pdf_file)
38
+
39
+
40
+ # Chat section
41
+
42
+ if st.session_state.context:
43
+ st.subheader("Chat with the PDF")
44
+
45
+ question = st.text_input("You", key="user_input")
46
+
47
+
48
+ if question:
49
+ result = qa(question=question, context=st.session_state.context) # type: ignore
50
+
51
+ context_chunk = st.session_state.context[:1500]
52
+ prompt = f"Context: {context_chunk}\nQuestion: {question}\nAnswer:"
53
+
54
+ generated = text_gen(prompt, max_length=100)[0]['generated_text'] # type: ignore
55
+
56
+ # save convo
57
+ st.session_state.chat_history.append(
58
+ {"user": question, "bot": generated}
59
+ )
60
+
61
+ # Display chat
62
+
63
+ for chat in st.session_state.chat_history:
64
+ st.markdown(f"**You:** {chat['user']}")
65
+ st.markdown(f"**Bot:** {chat['bot']}")
66
+
67
+ else:
68
+ st.info("Please upload PDF to begin")
requirements.txt CHANGED
@@ -1,12 +1,13 @@
1
- streamlit
2
- openai
3
- langchain-google-genai
4
- langchain-core
5
- langchain-text-splitters
6
- transformers
7
- tf-keras
8
- langchain
9
- chromadb
10
- tiktoken
11
- pypdf
 
12
  sentence-transformers
 
1
+ streamlit
2
+ openai
3
+ langchain-google-genai
4
+ langchain-core
5
+ langchain-community
6
+ langchain-text-splitters
7
+ transformers
8
+ tf-keras
9
+ langchain
10
+ chromadb
11
+ tiktoken
12
+ pypdf
13
  sentence-transformers