Mummia-99 commited on
Commit
6459544
·
verified ·
1 Parent(s): 02cc2b1

Rename rag_app.py to app.py

Browse files
Files changed (1) hide show
  1. rag_app.py → app.py +68 -68
rag_app.py → app.py RENAMED
@@ -1,69 +1,69 @@
1
- import streamlit as st
2
- import os
3
- import openai
4
- from sentence_transformers import SentenceTransformer, util
5
- import PyPDF2
6
-
7
-
8
- openai.api_key = "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"
9
-
10
- model = SentenceTransformer('all-MiniLM-L6-v2')
11
-
12
- def load_pdf(uploaded_file):
13
- with open("temp.pdf", "wb") as f:
14
- f.write(uploaded_file.getvalue())
15
-
16
- with open("temp.pdf", 'rb') as file:
17
- pdf_reader = PyPDF2.PdfReader(file)
18
- text = ""
19
- for page_num in range(len(pdf_reader.pages)):
20
- page = pdf_reader.pages[page_num]
21
- text += page.extract_text()
22
- return text
23
-
24
- def chunk_text(text, chunk_size=500, overlap=100):
25
- chunks = []
26
- for i in range(0, len(text), chunk_size - overlap):
27
- chunks.append(text[i:i + chunk_size])
28
- return chunks
29
-
30
-
31
- def create_embeddings(chunks):
32
- embeddings = model.encode(chunks, convert_to_tensor=True)
33
- return embeddings
34
-
35
- def find_relevant_chunks(query_embedding, chunk_embeddings, chunks, top_k=3):
36
- cosine_scores = util.pytorch_cos_sim(query_embedding, chunk_embeddings)[0]
37
- top_results = sorted(range(len(cosine_scores)), key=lambda i: cosine_scores[i], reverse=True)[:top_k]
38
- relevant_chunks = [chunks[i] for i in top_results]
39
- return relevant_chunks
40
-
41
- def generate_response(query, context):
42
- messages = [
43
- {"role": "system", "content": "You are a helpful assistant that answers questions based on provided context."},
44
- {"role": "user", "content": f"Context: {context}\nQuestion: {query}"}
45
- ]
46
- response = openai.chat.completions.create(
47
- model="gpt-3.5-turbo", # Or another suitable chat model
48
- messages=messages,
49
- max_tokens=200
50
- )
51
-
52
- return response.choices[0].message.content.strip()
53
-
54
- st.title("Simple RAG Application (No LangChain)")
55
- uploaded_file = st.file_uploader("Upload PDF", type="pdf")
56
-
57
- if uploaded_file:
58
- with st.spinner("Processing PDF..."):
59
- pdf_text = load_pdf(uploaded_file)
60
- chunks = chunk_text(pdf_text)
61
- chunk_embeddings = create_embeddings(chunks)
62
-
63
- query = st.text_input("Ask a question:")
64
- if query:
65
- query_embedding = model.encode([query], convert_to_tensor=True)
66
- relevant_chunks = find_relevant_chunks(query_embedding, chunk_embeddings, chunks)
67
- context = "\n".join(relevant_chunks)
68
- answer = generate_response(query, context)
69
  st.write("Answer:", answer)
 
1
+ import streamlit as st
2
+ import os
3
+ import openai
4
+ from sentence_transformers import SentenceTransformer, util
5
+ import PyPDF2
6
+
7
+
8
+ openai.api_key = os.getenv("openapikey")
9
+
10
+ model = SentenceTransformer('all-MiniLM-L6-v2')
11
+
12
+ def load_pdf(uploaded_file):
13
+ with open("temp.pdf", "wb") as f:
14
+ f.write(uploaded_file.getvalue())
15
+
16
+ with open("temp.pdf", 'rb') as file:
17
+ pdf_reader = PyPDF2.PdfReader(file)
18
+ text = ""
19
+ for page_num in range(len(pdf_reader.pages)):
20
+ page = pdf_reader.pages[page_num]
21
+ text += page.extract_text()
22
+ return text
23
+
24
+ def chunk_text(text, chunk_size=500, overlap=100):
25
+ chunks = []
26
+ for i in range(0, len(text), chunk_size - overlap):
27
+ chunks.append(text[i:i + chunk_size])
28
+ return chunks
29
+
30
+
31
+ def create_embeddings(chunks):
32
+ embeddings = model.encode(chunks, convert_to_tensor=True)
33
+ return embeddings
34
+
35
+ def find_relevant_chunks(query_embedding, chunk_embeddings, chunks, top_k=3):
36
+ cosine_scores = util.pytorch_cos_sim(query_embedding, chunk_embeddings)[0]
37
+ top_results = sorted(range(len(cosine_scores)), key=lambda i: cosine_scores[i], reverse=True)[:top_k]
38
+ relevant_chunks = [chunks[i] for i in top_results]
39
+ return relevant_chunks
40
+
41
+ def generate_response(query, context):
42
+ messages = [
43
+ {"role": "system", "content": "You are a helpful assistant that answers questions based on provided context."},
44
+ {"role": "user", "content": f"Context: {context}\nQuestion: {query}"}
45
+ ]
46
+ response = openai.chat.completions.create(
47
+ model="gpt-3.5-turbo", # Or another suitable chat model
48
+ messages=messages,
49
+ max_tokens=200
50
+ )
51
+
52
+ return response.choices[0].message.content.strip()
53
+
54
+ st.title("Simple RAG Application (No LangChain)")
55
+ uploaded_file = st.file_uploader("Upload PDF", type="pdf")
56
+
57
+ if uploaded_file:
58
+ with st.spinner("Processing PDF..."):
59
+ pdf_text = load_pdf(uploaded_file)
60
+ chunks = chunk_text(pdf_text)
61
+ chunk_embeddings = create_embeddings(chunks)
62
+
63
+ query = st.text_input("Ask a question:")
64
+ if query:
65
+ query_embedding = model.encode([query], convert_to_tensor=True)
66
+ relevant_chunks = find_relevant_chunks(query_embedding, chunk_embeddings, chunks)
67
+ context = "\n".join(relevant_chunks)
68
+ answer = generate_response(query, context)
69
  st.write("Answer:", answer)