File size: 3,910 Bytes
5cbae19
 
2c24a01
d54576a
c7d0bae
d54576a
2c24a01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fd2f6f
2c24a01
 
 
8fd2f6f
2c24a01
 
 
 
8fd2f6f
2c24a01
 
8fd2f6f
2c24a01
 
 
 
 
 
 
8fd2f6f
2c24a01
 
 
 
1bf21c8
2c24a01
b2c1899
1bf21c8
2c24a01
e135161
2c24a01
 
71c0e20
2c24a01
 
71c0e20
2c24a01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1bf21c8
 
d54576a
2c24a01
1bf21c8
 
 
 
2c24a01
4ddce82
1bf21c8
 
 
 
 
2c24a01
 
f7af5de
1bf21c8
 
 
2c24a01
1bf21c8
 
 
c7d0bae
1bf21c8
2c24a01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import streamlit as st
import os
import re

os.environ["OPENAI_API_KEY"] = "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"

from langchain_openai import ChatOpenAI
from openai import OpenAI
import tempfile

client = OpenAI()

def simple_split(text, chunk_size=1000):
    """Pure Python splitter"""
    sentences = re.split(r'[.!?]\s+', text)
    chunks = []
    current_chunk = ""
    
    for sentence in sentences:
        if len(current_chunk + sentence) < chunk_size:
            current_chunk += sentence + ". "
        else:
            if current_chunk:
                chunks.append(current_chunk.strip())
            current_chunk = sentence + ". "
    
    if current_chunk:
        chunks.append(current_chunk.strip())
    
    return chunks

def dynamic_rag(query, document_content):
    """Dynamic RAG - no external deps"""
    chunks = simple_split(document_content)
    
    # Simple similarity (keyword matching)
    best_chunks = []
    query_words = set(query.lower().split())
    
    for chunk in chunks:
        chunk_words = set(chunk.lower().split())
        score = len(query_words.intersection(chunk_words))
        best_chunks.append((score, chunk))
    
    best_chunks.sort(reverse=True, key=lambda x: x[0])
    context = "\n".join([chunk for score, chunk in best_chunks[:3]])
    
    prompt = f"""Use ONLY this context from document:

{context}

Question: {query}

Answer using context only:"""
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    return response.choices[0].message.content

st.title("🧠 Dynamic RAG Chatbot")
st.markdown("**Paste text or upload β†’ Ask ANY question!**")

# Input options
col1, col2 = st.columns(2)

with col1:
    uploaded_file = st.file_uploader("πŸ“€ Upload TXT", type='txt')
    
with col2:
    pasted_text = st.text_area("πŸ“ Or paste text here", height=150)

document_content = ""

if uploaded_file is not None:
    content = uploaded_file.read().decode('utf-8')
    document_content = content
    st.success("βœ… TXT loaded!")
elif pasted_text:
    document_content = pasted_text
    st.success("βœ… Text loaded!")

if document_content:
    st.session_state.document_content = document_content
    st.success("πŸš€ Chatbot ready! Ask about your text.")

if 'document_content' in st.session_state:
    if "messages" not in st.session_state:
        st.session_state.messages = []
    
    # Chat history
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])
    
    # Chat input
    if query := st.chat_input("πŸ’¬ Ask about your document..."):
        st.session_state.messages.append({"role": "user", "content": query})
        with st.chat_message("user"):
            st.markdown(query)
        
        with st.chat_message("assistant"):
            with st.spinner("πŸ” Searching document..."):
                response = dynamic_rag(query, st.session_state.document_content)
                st.markdown(response)
        
        st.session_state.messages.append({"role": "assistant", "content": response})

    # Clear
    if st.button("πŸ—‘οΈ Clear Chat"):
        st.session_state.messages = []
        st.rerun()

else:
    st.info("πŸ‘† **Paste text or upload TXT to start chatting!**")
    st.markdown("""
    **Test example:**
    ```
    Skills: Python, DSA, AI/ML
    Projects: RAG Chatbot (live demo)
    LeetCode: 300 problems solved
    ```
    Ask: "What projects?" β†’ Perfect answer!
    """)

st.sidebar.markdown("### πŸ› οΈ Pure Python RAG")
st.markdown("β€’ Custom text splitter")
st.markdown("β€’ Keyword similarity")
st.markdown("β€’ OpenAI GPT-4o-mini")
st.markdown("β€’ Dynamic input")