YAMITEK commited on
Commit
72c89fc
·
verified ·
1 Parent(s): a02449a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import openai
4
+ from sentence_transformers import SentenceTransformer, util
5
+ import PyPDF2
6
+
7
+
8
+ openai.api_key = os.getenv("openapikey")
9
+
10
+ model = SentenceTransformer('all-MiniLM-L6-v2')
11
+
12
+ def load_pdf(uploaded_file):
13
+ with open("temp.pdf", "wb") as f:
14
+ f.write(uploaded_file.getvalue())
15
+
16
+ with open("temp.pdf", 'rb') as file:
17
+ pdf_reader = PyPDF2.PdfReader(file)
18
+ text = ""
19
+ for page_num in range(len(pdf_reader.pages)):
20
+ page = pdf_reader.pages[page_num]
21
+ text += page.extract_text()
22
+ return text
23
+
24
+ def chunk_text(text, chunk_size=500, overlap=100):
25
+ chunks = []
26
+ for i in range(0, len(text), chunk_size - overlap):
27
+ chunks.append(text[i:i + chunk_size])
28
+ return chunks
29
+
30
+
31
+ def create_embeddings(chunks):
32
+ embeddings = model.encode(chunks, convert_to_tensor=True)
33
+ return embeddings
34
+
35
+ def find_relevant_chunks(query_embedding, chunk_embeddings, chunks, top_k=3):
36
+ cosine_scores = util.pytorch_cos_sim(query_embedding, chunk_embeddings)[0]
37
+ top_results = sorted(range(len(cosine_scores)), key=lambda i: cosine_scores[i], reverse=True)[:top_k]
38
+ relevant_chunks = [chunks[i] for i in top_results]
39
+ return relevant_chunks
40
+
41
+ def generate_response(query, context):
42
+ messages = [
43
+ {"role": "system", "content": "You are a helpful assistant that answers questions based on provided context."},
44
+ {"role": "user", "content": f"Context: {context}\nQuestion: {query}"}
45
+ ]
46
+ response = openai.chat.completions.create(
47
+ model="gpt-3.5-turbo", # Or another suitable chat model
48
+ messages=messages,
49
+ max_tokens=200
50
+ )
51
+
52
+ return response.choices[0].message.content.strip()
53
+
54
+ st.title("Simple RAG Application (No LangChain)")
55
+ uploaded_file = st.file_uploader("Upload PDF", type="pdf")
56
+
57
+ if uploaded_file:
58
+ with st.spinner("Processing PDF..."):
59
+ pdf_text = load_pdf(uploaded_file)
60
+ chunks = chunk_text(pdf_text)
61
+ chunk_embeddings = create_embeddings(chunks)
62
+
63
+ query = st.text_input("Ask a question:")
64
+ if query:
65
+ query_embedding = model.encode([query], convert_to_tensor=True)
66
+ relevant_chunks = find_relevant_chunks(query_embedding, chunk_embeddings, chunks)
67
+ context = "\n".join(relevant_chunks)
68
+ answer = generate_response(query, context)
69
+ st.write("Answer:", answer)