Dinesh310 commited on
Commit
baa3fcb
·
verified ·
1 Parent(s): 638efb2

Delete Repo

Browse files
Repo/Demo_1/.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Repo/Demo_1/.streamlit/config.toml DELETED
@@ -1,4 +0,0 @@
1
- [server]
2
- enableCORS = false
3
- enableXsrfProtection = false
4
- maxUploadSize = 200
 
 
 
 
 
Repo/Demo_1/Dockerfile DELETED
@@ -1,21 +0,0 @@
1
- FROM python:3.11-slim
2
-
3
- WORKDIR /app
4
-
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- git \
9
- && rm -rf /var/lib/apt/lists/*
10
-
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
13
- COPY . .
14
-
15
- RUN pip3 install -r requirements.txt
16
-
17
- EXPOSE 8501
18
-
19
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
-
21
- ENTRYPOINT ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Repo/Demo_1/README.md DELETED
@@ -1,20 +0,0 @@
1
- ---
2
- title: Demo 1
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: for learning
12
- license: mit
13
- ---
14
-
15
- # Welcome to Streamlit!
16
-
17
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
18
-
19
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Repo/Demo_1/requirements.txt DELETED
@@ -1,13 +0,0 @@
1
- langchain
2
- langchain-community
3
- sentence-transformers
4
- langchain-huggingface
5
- langchain-openai
6
- langgraph
7
- openai
8
- faiss-cpu
9
- pydantic
10
- python-dotenv
11
- requests
12
- streamlit
13
- pypdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Repo/Demo_1/src/__init__.py DELETED
File without changes
Repo/Demo_1/src/config/__init__.py DELETED
File without changes
Repo/Demo_1/src/config/config.py DELETED
@@ -1,20 +0,0 @@
1
- # src/config.py
2
- import os
3
-
4
- # Embeddings
5
- EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
6
- EMBEDDING_DEVICE = "cpu"
7
- NORMALIZE_EMBEDDINGS = True
8
-
9
- # LLM
10
- LLM_MODEL = "openai/gpt-oss-120b:free"
11
- OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
12
- OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
13
-
14
- # Text Splitter
15
- CHUNK_SIZE = 500
16
- CHUNK_OVERLAP = 100
17
-
18
- # Retriever
19
- MMR_LAMBDA = 0.25
20
- K_OFFSET = 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Repo/Demo_1/src/core/__init__.py DELETED
File without changes
Repo/Demo_1/src/core/embeddings.py DELETED
@@ -1,17 +0,0 @@
1
- from langchain_huggingface import HuggingFaceEmbeddings
2
- from src.config.config import (
3
- EMBEDDING_MODEL,
4
- EMBEDDING_DEVICE,
5
- NORMALIZE_EMBEDDINGS
6
- )
7
-
8
- def load_embeddings():
9
- try:
10
- return HuggingFaceEmbeddings(
11
- model_name=EMBEDDING_MODEL,
12
- model_kwargs={"device": EMBEDDING_DEVICE},
13
- encode_kwargs={"normalize_embeddings": NORMALIZE_EMBEDDINGS}
14
- )
15
- except Exception as e:
16
- raise RuntimeError(f"Failed to load embeddings: {e}")
17
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Repo/Demo_1/src/core/graph_state.py DELETED
@@ -1,8 +0,0 @@
1
- from typing import List, TypedDict
2
- from langchain_core.documents import Document
3
-
4
- class GraphState(TypedDict):
5
- question: str
6
- context: List[Document]
7
- answer: str
8
-
 
 
 
 
 
 
 
 
 
Repo/Demo_1/src/core/llm.py DELETED
@@ -1,17 +0,0 @@
1
- # src/llm.py
2
- from langchain_openai import ChatOpenAI
3
- from src.config.config import (
4
- LLM_MODEL,
5
- OPENROUTER_BASE_URL,
6
- OPENROUTER_API_KEY
7
- )
8
-
9
- def load_llm():
10
- if not OPENROUTER_API_KEY:
11
- raise EnvironmentError("OPENROUTER_API_KEY not set")
12
-
13
- return ChatOpenAI(
14
- model=LLM_MODEL,
15
- base_url=OPENROUTER_BASE_URL,
16
- api_key=OPENROUTER_API_KEY
17
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Repo/Demo_1/src/exceptions.py DELETED
@@ -1,8 +0,0 @@
1
- class DocumentProcessingError(Exception):
2
- pass
3
-
4
- class VectorStoreNotInitializedError(Exception):
5
- pass
6
-
7
- class LLMInvocationError(Exception):
8
- pass
 
 
 
 
 
 
 
 
 
Repo/Demo_1/src/rag_graph.py DELETED
@@ -1,92 +0,0 @@
1
- # src/rag_graph.py
2
- from langgraph.graph import StateGraph, END
3
- from langgraph.checkpoint.memory import MemorySaver
4
- from langchain_core.prompts import ChatPromptTemplate
5
-
6
- from src.core.graph_state import GraphState
7
- from src.core.embeddings import load_embeddings
8
- from src.core.llm import load_llm
9
- from src.vector_store.vector_store import build_vector_store
10
- from src.config.config import K_OFFSET, MMR_LAMBDA
11
- from src.exceptions import VectorStoreNotInitializedError, LLMInvocationError
12
-
13
-
14
- class ProjectRAGGraph:
15
- def __init__(self):
16
- self.embeddings = load_embeddings()
17
- self.llm = load_llm()
18
- self.vector_store = None
19
- self.pdf_count = 0
20
- self.memory = MemorySaver()
21
- self.workflow = self._build_graph()
22
-
23
- def process_documents(self, pdf_paths, original_names=None):
24
- self.pdf_count = len(pdf_paths)
25
- self.vector_store = build_vector_store(
26
- pdf_paths,
27
- self.embeddings,
28
- original_names
29
- )
30
-
31
- # ---------- Graph Nodes ----------
32
-
33
- def retrieve(self, state: GraphState):
34
- if not self.vector_store:
35
- raise VectorStoreNotInitializedError("Vector store not initialized")
36
-
37
- k_value = max(1, self.pdf_count + K_OFFSET)
38
-
39
- retriever = self.vector_store.as_retriever(
40
- search_type="mmr",
41
- search_kwargs={"k": k_value, "lambda_mult": MMR_LAMBDA}
42
- )
43
-
44
- documents = retriever.invoke(state["question"])
45
- return {"context": documents}
46
-
47
- def generate(self, state: GraphState):
48
- try:
49
- prompt = ChatPromptTemplate.from_template(
50
- """
51
- You are an expert Project Analyst.
52
- Answer ONLY using the provided context.
53
- If the answer is not present, say "I don't know".
54
-
55
- Context:
56
- {context}
57
-
58
- Question:
59
- {question}
60
- """
61
- )
62
-
63
- formatted_context = "\n\n".join(
64
- doc.page_content for doc in state["context"]
65
- )
66
-
67
- chain = prompt | self.llm
68
- response = chain.invoke({
69
- "context": formatted_context,
70
- "question": state["question"]
71
- })
72
-
73
- return {"answer": response.content}
74
-
75
- except Exception as e:
76
- raise LLMInvocationError(f"LLM failed: {e}")
77
-
78
- # ---------- Graph Build ----------
79
-
80
- def _build_graph(self):
81
- workflow = StateGraph(GraphState)
82
- workflow.add_node("retrieve", self.retrieve)
83
- workflow.add_node("generate", self.generate)
84
- workflow.set_entry_point("retrieve")
85
- workflow.add_edge("retrieve", "generate")
86
- workflow.add_edge("generate", END)
87
- return workflow.compile(checkpointer=self.memory)
88
-
89
- def query(self, question: str, thread_id: str):
90
- config = {"configurable": {"thread_id": thread_id}}
91
- result = self.workflow.invoke({"question": question}, config=config)
92
- return result["answer"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Repo/Demo_1/src/vector_store/vector_store.py DELETED
@@ -1,31 +0,0 @@
1
- # src/vector_store.py
2
- from langchain_community.document_loaders import PyPDFLoader
3
- from langchain_text_splitters import RecursiveCharacterTextSplitter
4
- from langchain_community.vectorstores import FAISS
5
- from src.config.config import CHUNK_SIZE, CHUNK_OVERLAP
6
- from src.exceptions import DocumentProcessingError
7
-
8
- def build_vector_store(pdf_paths, embeddings, original_names=None):
9
- try:
10
- all_docs = []
11
-
12
- for i, path in enumerate(pdf_paths):
13
- loader = PyPDFLoader(path)
14
- docs = loader.load()
15
-
16
- if original_names and i < len(original_names):
17
- for doc in docs:
18
- doc.metadata["source"] = original_names[i]
19
-
20
- all_docs.extend(docs)
21
-
22
- splitter = RecursiveCharacterTextSplitter(
23
- chunk_size=CHUNK_SIZE,
24
- chunk_overlap=CHUNK_OVERLAP
25
- )
26
-
27
- splits = splitter.split_documents(all_docs)
28
- return FAISS.from_documents(splits, embeddings)
29
-
30
- except Exception as e:
31
- raise DocumentProcessingError(f"PDF processing failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Repo/Demo_1/streamlit_app.py DELETED
@@ -1,121 +0,0 @@
1
- import streamlit as st
2
- import os
3
- import tempfile
4
- # from src.RAG_builder import ProjectRAGGraph # Ensure your graph class is in your_filename.py
5
-
6
- from src.rag_graph import ProjectRAGGraph
7
-
8
- # from src.graph.rag_graph import ProjectRAGGraph
9
- # --- Page Config ---
10
- st.set_page_config(page_title="Project Analyst RAG", layout="wide")
11
- st.title("📄 Professional Project Analyst Chat")
12
-
13
- # --- Initialize Session State ---
14
- if "rag_graph" not in st.session_state:
15
- st.session_state.rag_graph = ProjectRAGGraph()
16
- if "messages" not in st.session_state:
17
- st.session_state.messages = []
18
- if "thread_id" not in st.session_state:
19
- st.session_state.thread_id = "default_user_1" # Hardcoded for demo, could be unique per session
20
-
21
- # --- Sidebar: File Upload ---
22
- with st.sidebar:
23
- st.header("Upload Documents")
24
- uploaded_files = st.file_uploader(
25
- "Upload Project PDFs",
26
- type="pdf",
27
- accept_multiple_files=True
28
- )
29
-
30
- process_button = st.button("Process Documents")
31
-
32
- if process_button and uploaded_files:
33
- with st.spinner("Processing PDFs..."):
34
- pdf_paths = []
35
- original_names = [] # <--- Add this
36
- for uploaded_file in uploaded_files:
37
- original_names.append(uploaded_file.name) # <--- Capture real name
38
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
39
- tmp.write(uploaded_file.getvalue())
40
- pdf_paths.append(tmp.name)
41
-
42
- # Pass BOTH the paths and the original names
43
- st.session_state.rag_graph.process_documents(
44
- pdf_paths,
45
- original_names=original_names
46
- )
47
-
48
- for path in pdf_paths:
49
- os.remove(path)
50
- st.success("Documents Indexed Successfully!")
51
-
52
- # if process_button and uploaded_files:
53
- # with st.spinner("Processing PDFs..."):
54
- # # Create temporary file paths to pass to your PDF Loader
55
- # pdf_paths = []
56
- # for uploaded_file in uploaded_files:
57
- # with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
58
- # tmp.write(uploaded_file.getvalue())
59
- # pdf_paths.append(tmp.name)
60
-
61
- # # Use your existing process_documents method
62
- # st.session_state.rag_graph.process_documents(pdf_paths)
63
-
64
- # # Clean up temp files
65
- # for path in pdf_paths:
66
- # os.remove(path)
67
-
68
- # st.success("Documents Indexed Successfully!")
69
-
70
- # --- Chat Interface ---
71
- # Display existing messages
72
- for message in st.session_state.messages:
73
- with st.chat_message(message["role"]):
74
- st.markdown(message["content"])
75
- if "citations" in message and message["citations"]:
76
- with st.expander("View Sources"):
77
- for doc in message["citations"]:
78
- st.caption(f"Source: {doc.metadata.get('source', 'Unknown')} - Page: {doc.metadata.get('page', 'N/A')}")
79
- st.write(f"_{doc.page_content[:200]}..._")
80
-
81
- # User Input
82
- if prompt := st.chat_input("Ask a question about your projects..."):
83
- # Check if vector store is ready
84
- if st.session_state.rag_graph.vector_store is None:
85
- st.error("Please upload and process documents first!")
86
- else:
87
- # Add user message to state
88
- st.session_state.messages.append({"role": "user", "content": prompt})
89
- with st.chat_message("user"):
90
- st.markdown(prompt)
91
-
92
- # Generate Response using the Graph
93
- with st.chat_message("assistant"):
94
- with st.spinner("Analyzing..."):
95
- # We need to call the graph. We'll modify the query return slightly to get citations
96
- config = {"configurable": {"thread_id": st.session_state.thread_id}}
97
- inputs = {"question": prompt}
98
-
99
- # Execute graph
100
- result = st.session_state.rag_graph.workflow.invoke(inputs, config=config)
101
-
102
- answer = result["answer"]
103
- context = result["context"] # These are the retrieved Document objects
104
-
105
- st.markdown(answer)
106
-
107
- # Citations section
108
- if context:
109
- with st.expander("View Sources"):
110
- for doc in context:
111
- source_name = os.path.basename(doc.metadata.get('source', 'Unknown'))
112
- page_num = doc.metadata.get('page', 0) + 1
113
- st.caption(f"📄 {source_name} (Page {page_num})")
114
- st.write(f"_{doc.page_content[:300]}..._")
115
-
116
- # Add assistant response to state
117
- st.session_state.messages.append({
118
- "role": "assistant",
119
- "content": answer,
120
- "citations": context
121
- })