Dhruv1102 commited on
Commit
6e272be
·
verified ·
1 Parent(s): 43b4631

Upload 7 files

Browse files
Files changed (7) hide show
  1. app.py +80 -0
  2. gpt_utils.py +15 -0
  3. pi_shard.py +26 -0
  4. pi_utils.py +23 -0
  5. pi_vector_utils.py +21 -0
  6. requirements.txt +6 -0
  7. style.css +6 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pi_shard import pi_shard, get_pi_digits
3
+ from gpt_utils import analyze_chunk
4
+ from pi_utils import random_pi_fact, generate_pi_graph
5
+ from pi_vector_utils import get_embedding, pi_rotation, pi_modulated_similarity
6
+ import fitz
7
+ import docx
8
+
9
+ st.set_page_config(page_title="Play with Pi", layout="wide")
10
+ st.title("🎲 Play with Pi - π-Based Chunking Engine")
11
+
12
+ st.sidebar.header("🔧 Controls")
13
+ openai_key = st.sidebar.text_input("OpenAI API Key", type="password")
14
+ uploaded_file = st.file_uploader("Upload a document", type=["txt", "pdf", "docx"])
15
+
16
+ if uploaded_file:
17
+ # Handle uploaded file types
18
+ if uploaded_file.name.endswith(".txt"):
19
+ text = uploaded_file.read().decode("utf-8")
20
+ elif uploaded_file.name.endswith(".pdf"):
21
+ doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
22
+ text = " ".join([page.get_text() for page in doc])
23
+ elif uploaded_file.name.endswith(".docx"):
24
+ doc = docx.Document(uploaded_file)
25
+ text = "\n".join([para.text for para in doc.paragraphs])
26
+
27
+ st.subheader("📄 Original Document")
28
+ st.text_area("Document Preview", text[:1000] + "...", height=150)
29
+
30
+ # Create π-based chunks
31
+ chunks = pi_shard(text)
32
+ st.subheader(f"🔍 π-Shards (Total: {len(chunks)})")
33
+ selected = st.selectbox("Select Chunk", range(len(chunks)))
34
+ st.code(chunks[selected], language="markdown")
35
+
36
+ # GPT Analysis of Selected Chunk
37
+ if openai_key:
38
+ st.markdown("#### ✨ GPT Analysis")
39
+ if st.button("Analyze Selected Chunk"):
40
+ with st.spinner("Thinking like π..."):
41
+ result = analyze_chunk(chunks[selected], openai_key)
42
+ st.success("Done!")
43
+ st.markdown(result)
44
+
45
+ # Question Answering Section
46
+ st.markdown("#### 🤔 Ask a Question about the Document")
47
+ user_query = st.text_area("Enter your question:", "")
48
+
49
+ if openai_key and st.button("🚀 Submit"):
50
+ if user_query:
51
+ st.info("Generating embeddings and rotating using π...")
52
+ pi_digits = get_pi_digits(len(chunks))
53
+ query_vec = get_embedding(user_query, openai_key)
54
+
55
+ scores = []
56
+ for i, chunk in enumerate(chunks):
57
+ chunk_vec = get_embedding(chunk, openai_key)
58
+ rotated = pi_rotation(chunk_vec, pi_digits[i])
59
+ sim = pi_modulated_similarity(query_vec, rotated, pi_digits[i])
60
+ scores.append((i, sim))
61
+
62
+ scores.sort(key=lambda x: x[1], reverse=True)
63
+ top_index = scores[0][0]
64
+
65
+ st.success(f"✅ Best π-Chunk Match (Chunk #{top_index})")
66
+ st.code(chunks[top_index])
67
+
68
+ # Analyze matched chunk with GPT
69
+ st.markdown("#### 📚 GPT Response to Query")
70
+ with st.spinner("Analyzing the matched chunk..."):
71
+ answer = analyze_chunk(chunks[top_index], openai_key)
72
+ st.markdown(answer)
73
+
74
+ # Sidebar - Pi facts and visualization
75
+ st.sidebar.subheader("🎲 Pi Fact")
76
+ st.sidebar.info(random_pi_fact())
77
+
78
+ if st.sidebar.button("🌀 Show π-Graph"):
79
+ fig = generate_pi_graph()
80
+ st.pyplot(fig)
gpt_utils.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+
3
+ def analyze_chunk(chunk, openai_key):
4
+ openai.api_key = "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"
5
+ try:
6
+ response = openai.ChatCompletion.create(
7
+ model="gpt-4-turbo",
8
+ messages=[
9
+ {"role": "system", "content": "Summarize and analyze the following chunk."},
10
+ {"role": "user", "content": chunk}
11
+ ]
12
+ )
13
+ return response['choices'][0]['message']['content']
14
+ except Exception as e:
15
+ return f"Error: {str(e)}"
pi_shard.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def clean_text(text):
4
+ return re.sub(r'\s+', ' ', text).strip()
5
+
6
+ def get_pi_digits(n=100):
7
+ with open('assets/pi_digits.txt', 'r') as f:
8
+ digits = f.read().replace('.', '').replace('\n', '')
9
+ return [int(d) for d in digits[:n] if d.isdigit()]
10
+
11
+ def pi_shard(text, max_chunks=50):
12
+ text = clean_text(text)
13
+ pi_digits = get_pi_digits()
14
+ chunks = []
15
+ index = 0
16
+ i = 0
17
+
18
+ while index < len(text) and len(chunks) < max_chunks:
19
+ length = pi_digits[i % len(pi_digits)] + 1
20
+ chunk = text[index:index + length * 50]
21
+ overlap = pi_digits[(i+1) % len(pi_digits)] * 5
22
+ chunks.append(chunk)
23
+ index += length * 50 - overlap
24
+ i += 1
25
+
26
+ return chunks
pi_utils.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+
5
+ pi_facts = [
6
+ "π is irrational and never ends!",
7
+ "π has been calculated to over 62 trillion digits!",
8
+ "The symbol π was first used in 1706.",
9
+ "You can’t express π as a fraction!",
10
+ "March 14 (3/14) is Pi Day!"
11
+ ]
12
+
13
+ def random_pi_fact():
14
+ return random.choice(pi_facts)
15
+
16
+ def generate_pi_graph():
17
+ digits = [int(d) for d in open('assets/pi_digits.txt').read() if d.isdigit()]
18
+ x = list(range(len(digits)))
19
+ y = np.cumsum([(-1)**i * d for i, d in enumerate(digits[:500])])
20
+ fig, ax = plt.subplots()
21
+ ax.plot(x[:len(y)], y)
22
+ ax.set_title("π Waveform based on Digits")
23
+ return fig
pi_vector_utils.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from numpy.linalg import norm
3
+ import openai
4
+
5
+ def get_embedding(text, openai_key):
6
+ openai.api_key = openai_key
7
+ try:
8
+ result = openai.Embedding.create(model="text-embedding-ada-002", input=text)
9
+ return np.array(result['data'][0]['embedding'])
10
+ except Exception as e:
11
+ return np.zeros(1536) # Return zero vector on error
12
+
13
+ def pi_rotation(embedding, pi_digit):
14
+ theta = (pi_digit / 9) * np.pi
15
+ rotated = embedding * np.cos(theta) + np.roll(embedding, 1) * np.sin(theta)
16
+ return rotated
17
+
18
+ def pi_modulated_similarity(query_vec, chunk_vec, pi_digit):
19
+ cosine_sim = np.dot(query_vec, chunk_vec) / (norm(query_vec) * norm(chunk_vec))
20
+ weight = 1 + (pi_digit % 5) * 0.1
21
+ return cosine_sim * weight
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ openai==0.28
3
+ python-docx
4
+ PyMuPDF
5
+ matplotlib
6
+ numpy
style.css ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ body {
2
+ background-color: #f0fff0;
3
+ }
4
+ h1, h2 {
5
+ color: #0a0;
6
+ }