Spaces:
Sleeping
Sleeping
Akshay Kumar BM
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,20 +16,22 @@ from dotenv import load_dotenv
|
|
| 16 |
|
| 17 |
class ContentProcessor:
|
| 18 |
def __init__(self):
|
| 19 |
-
#load_dotenv()
|
| 20 |
-
#self.configure_environment()
|
| 21 |
self.configure_streamlit()
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
|
| 25 |
-
os.environ['LANGCHAIN_TRACING_V2'] = "true"
|
| 26 |
-
os.environ['LANGCHAIN_PROJECT'] = "LangChain: Process Content from Multiple Sources"
|
| 27 |
-
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
|
| 28 |
|
| 29 |
def configure_streamlit(self):
|
| 30 |
st.set_page_config(page_title="LangChain: Process Content from Multiple Sources", page_icon="🦜")
|
| 31 |
st.title("🦜 LangChain: Process Content from Multiple Sources")
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
def calculate_chunk_size(self, text_length: int, model_context_length: int) -> int:
|
| 34 |
target_chunk_size = model_context_length // 3
|
| 35 |
return max(1000, min(target_chunk_size, model_context_length // 2))
|
|
@@ -37,7 +39,11 @@ class ContentProcessor:
|
|
| 37 |
def get_configuration(self) -> Dict[str, Any]:
|
| 38 |
with st.sidebar:
|
| 39 |
st.header("Configuration")
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
model = st.selectbox("Select Model", ["llama3-8b-8192", "gemma2-9b-it", "mixtral-8x7b-32768"])
|
| 42 |
|
| 43 |
st.header("Task")
|
|
@@ -74,6 +80,7 @@ class ContentProcessor:
|
|
| 74 |
os.unlink(temp_file_path)
|
| 75 |
return pdf_pages
|
| 76 |
|
|
|
|
| 77 |
def process_content(self, sources: Dict[str, Any]) -> List[Document]:
|
| 78 |
all_docs = []
|
| 79 |
|
|
@@ -115,7 +122,7 @@ class ContentProcessor:
|
|
| 115 |
os.unlink(temp_file_path)
|
| 116 |
|
| 117 |
if 'text' in sources and sources['text']:
|
| 118 |
-
with tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".txt") as temp_file:
|
| 119 |
temp_file.write(sources['text'])
|
| 120 |
temp_file_path = temp_file.name
|
| 121 |
|
|
@@ -168,7 +175,9 @@ class ContentProcessor:
|
|
| 168 |
refine_prompt=prompts["refine_prompt"]
|
| 169 |
)
|
| 170 |
|
| 171 |
-
|
|
|
|
|
|
|
| 172 |
|
| 173 |
def create_retriever(self, docs: List[Document]) -> FAISS:
|
| 174 |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
|
@@ -193,14 +202,12 @@ class ContentProcessor:
|
|
| 193 |
action = "Answer questions about the content"
|
| 194 |
|
| 195 |
process_button = st.button("Process Content")
|
| 196 |
-
|
| 197 |
-
if 'docs' not in st.session_state:
|
| 198 |
-
st.session_state.docs = None
|
| 199 |
-
if 'retriever' not in st.session_state:
|
| 200 |
-
st.session_state.retriever = None
|
| 201 |
|
| 202 |
if process_button:
|
| 203 |
-
if
|
|
|
|
|
|
|
|
|
|
| 204 |
st.error("Please provide your Groq API Key in the sidebar.")
|
| 205 |
elif not sources:
|
| 206 |
st.error("Please select at least one source type and provide content.")
|
|
@@ -220,6 +227,7 @@ class ContentProcessor:
|
|
| 220 |
else: # Interactive Q&A
|
| 221 |
st.session_state.retriever = self.create_retriever(st.session_state.docs)
|
| 222 |
st.success("Document processed and ready for questions!")
|
|
|
|
| 223 |
|
| 224 |
if config['task'] == "Interactive Q&A" and st.session_state.retriever is not None:
|
| 225 |
question = st.text_input("Ask a question about the document:")
|
|
|
|
| 16 |
|
| 17 |
class ContentProcessor:
|
| 18 |
def __init__(self):
|
|
|
|
|
|
|
| 19 |
self.configure_streamlit()
|
| 20 |
+
self.default_api_key = "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"
|
| 21 |
+
self.initialize_session_state()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def configure_streamlit(self):
|
| 24 |
st.set_page_config(page_title="LangChain: Process Content from Multiple Sources", page_icon="🦜")
|
| 25 |
st.title("🦜 LangChain: Process Content from Multiple Sources")
|
| 26 |
|
| 27 |
+
def initialize_session_state(self):
|
| 28 |
+
if 'action_count' not in st.session_state:
|
| 29 |
+
st.session_state.action_count = 0
|
| 30 |
+
if 'docs' not in st.session_state:
|
| 31 |
+
st.session_state.docs = None
|
| 32 |
+
if 'retriever' not in st.session_state:
|
| 33 |
+
st.session_state.retriever = None
|
| 34 |
+
|
| 35 |
def calculate_chunk_size(self, text_length: int, model_context_length: int) -> int:
|
| 36 |
target_chunk_size = model_context_length // 3
|
| 37 |
return max(1000, min(target_chunk_size, model_context_length // 2))
|
|
|
|
| 39 |
def get_configuration(self) -> Dict[str, Any]:
|
| 40 |
with st.sidebar:
|
| 41 |
st.header("Configuration")
|
| 42 |
+
if st.session_state.action_count >= 3:
|
| 43 |
+
groq_api_key = st.text_input("Groq API Key", type="password")
|
| 44 |
+
else:
|
| 45 |
+
groq_api_key = self.default_api_key
|
| 46 |
+
st.info(f"Using default API key. {3 - st.session_state.action_count} free actions remaining.")
|
| 47 |
model = st.selectbox("Select Model", ["llama3-8b-8192", "gemma2-9b-it", "mixtral-8x7b-32768"])
|
| 48 |
|
| 49 |
st.header("Task")
|
|
|
|
| 80 |
os.unlink(temp_file_path)
|
| 81 |
return pdf_pages
|
| 82 |
|
| 83 |
+
|
| 84 |
def process_content(self, sources: Dict[str, Any]) -> List[Document]:
|
| 85 |
all_docs = []
|
| 86 |
|
|
|
|
| 122 |
os.unlink(temp_file_path)
|
| 123 |
|
| 124 |
if 'text' in sources and sources['text']:
|
| 125 |
+
with tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".txt", encoding="utf-8") as temp_file:
|
| 126 |
temp_file.write(sources['text'])
|
| 127 |
temp_file_path = temp_file.name
|
| 128 |
|
|
|
|
| 175 |
refine_prompt=prompts["refine_prompt"]
|
| 176 |
)
|
| 177 |
|
| 178 |
+
result = chain.run(input_documents=split_docs, action=action.lower())
|
| 179 |
+
st.session_state.action_count += 1
|
| 180 |
+
return result
|
| 181 |
|
| 182 |
def create_retriever(self, docs: List[Document]) -> FAISS:
|
| 183 |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
|
|
|
| 202 |
action = "Answer questions about the content"
|
| 203 |
|
| 204 |
process_button = st.button("Process Content")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
if process_button:
|
| 207 |
+
if st.session_state.action_count >= 3:
|
| 208 |
+
self.default_api_key = ""
|
| 209 |
+
st.error("You have used all free actions. Please provide your Groq API Key in the sidebar.")
|
| 210 |
+
elif not config['groq_api_key'].strip():
|
| 211 |
st.error("Please provide your Groq API Key in the sidebar.")
|
| 212 |
elif not sources:
|
| 213 |
st.error("Please select at least one source type and provide content.")
|
|
|
|
| 227 |
else: # Interactive Q&A
|
| 228 |
st.session_state.retriever = self.create_retriever(st.session_state.docs)
|
| 229 |
st.success("Document processed and ready for questions!")
|
| 230 |
+
st.session_state.action_count += 1
|
| 231 |
|
| 232 |
if config['task'] == "Interactive Q&A" and st.session_state.retriever is not None:
|
| 233 |
question = st.text_input("Ask a question about the document:")
|