Spaces:

red1xe
/

codeGPT

Runtime error

App Files Files Community

red1xe commited on Aug 11, 2023

Commit

e9eaff4

1 Parent(s): 5beeb23

some changes

Browse files

Files changed (1) hide show

app.py +38 -7

app.py CHANGED Viewed

@@ -1,16 +1,47 @@
 import streamlit as st
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from pdfminer.high_level import extract_text
 st.title("Embedding Creation for Langchain")
 st.header("File Upload")
 files = st.file_uploader("Upload your files", accept_multiple_files=True, type="pdf")
 if files:
-    st.header("PDFs to Text")
-    if st.button("Convert"):
-        for file in files:
-            text = extract_text(file)
-            full_text = text.replace("\n", " ")
-            st.write(full_text)

 import streamlit as st
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.memory import ConversationBufferMemory
+from langchain.llms import HuggingFaceHub
+from langchain.chains import RetrievalQA
+from transformers import AutoModelForCausalLM, AutoTokenizer
 from pdfminer.high_level import extract_text
+def get_pdf_text(files):
+    full_text = ""
+    for file in files:
+            text = extract_text(file)
+            text = text.replace("\n", " ")
+            full_text = text + full_text
+    return full_text
 st.title("Embedding Creation for Langchain")
 st.header("File Upload")
 files = st.file_uploader("Upload your files", accept_multiple_files=True, type="pdf")
 if files:
+    st.header("Start Conversion")
+    if st.button("Ready!"):
+       with st.spinner("Creating chain..."):
+        full_text = get_pdf_text(files)
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
+        chunks = text_splitter.split_text(full_text)
+        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+        vectorstore = FAISS.from_texts(chunks, embeddings)
+        memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True,)
+        llm = AutoModelForCausalLM.from_pretrained("red1xe/Llama-2-7B-codeGPT")
+        chain = RetrievalQA.from_chain_type(
+            llm=llm,
+            chain_type="retrieval-qa",
+            retriever=vectorstore.as_retriever(),
+            memory=memory,
+        )
+        st.success("Done!")
+        st.header("Start Chat")
+        st.subheader("Ask a question")
+        question = st.text_input("Question")
+        if st.button("Ask"):
+            with st.spinner("Thinking..."):
+                answer = chain.query(question)
+                st.success(answer)