MusaR commited on
Commit
9b86fc8
·
verified ·
1 Parent(s): e086a87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py CHANGED
@@ -5,6 +5,8 @@ print("--- Python script starting ---")
5
  import streamlit as st
6
  import os
7
 
 
 
8
  os.environ['TOKENIZERS_PARALLELISM'] = 'false'
9
  os.environ['HF_HOME'] = '/app/huggingface_cache' # For transformers and datasets
10
  os.environ['TRANSFORMERS_CACHE'] = '/app/huggingface_cache/transformers'
@@ -109,6 +111,47 @@ try:
109
  input_variables=["context", "question"],
110
  partial_variables={"format_instructions": format_instructions}
111
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  rag_chain = (
113
  {"context": retriever, "question": RunnablePassthrough()}
114
  | prompt
 
5
  import streamlit as st
6
  import os
7
 
8
+ import langchain
9
+ langchain.debug = True
10
  os.environ['TOKENIZERS_PARALLELISM'] = 'false'
11
  os.environ['HF_HOME'] = '/app/huggingface_cache' # For transformers and datasets
12
  os.environ['TRANSFORMERS_CACHE'] = '/app/huggingface_cache/transformers'
 
111
  input_variables=["context", "question"],
112
  partial_variables={"format_instructions": format_instructions}
113
  )
114
+
115
+ # --- NEW: Break down the chain for debugging ---
116
+ def retrieve_and_rerank(input_dict):
117
+ print(f"--- RAG DEBUG: Retrieving for question: {input_dict['question']} ---")
118
+ docs = retriever.invoke(input_dict['question'])
119
+ print(f"--- RAG DEBUG: Retrieved {len(docs)} docs after reranking ---")
120
+ for i, doc in enumerate(docs):
121
+ print(f" Doc {i} (source: {doc.metadata.get('source', 'N/A')}, page: {doc.metadata.get('page', 'N/A')}): {doc.page_content[:100]}...")
122
+ return {"context": docs, "question": input_dict['question']}
123
+
124
+ def format_prompt(input_dict):
125
+ print(f"--- RAG DEBUG: Formatting prompt with context ---")
126
+ # Manually construct the context string to see it clearly
127
+ context_str = "\n\n---\n\n".join([doc.page_content for doc in input_dict['context']])
128
+ print(f"--- RAG DEBUG: Context fed to LLM: {context_str[:500]}... ---") # Print first 500 chars of context
129
+ return prompt.invoke({"context": context_str, "question": input_dict['question']})
130
+
131
+ def call_llm(formatted_prompt):
132
+ print(f"--- RAG DEBUG: Calling LLM ---")
133
+ llm_output = llm.invoke(formatted_prompt)
134
+ print(f"--- RAG DEBUG: Raw LLM Output: {llm_output} ---") # See exactly what Groq returns
135
+ return llm_output
136
+
137
+ def parse_output(llm_output_str):
138
+ print(f"--- RAG DEBUG: Attempting to parse LLM output with Pydantic ---")
139
+ try:
140
+ parsed = pydantic_parser.invoke(llm_output_str)
141
+ print(f"--- RAG DEBUG: Pydantic parsing successful ---")
142
+ return parsed
143
+ except Exception as e_parse:
144
+ print(f"!!!!!!!!!! PYDANTIC PARSING ERROR !!!!!!!!!!")
145
+ print(f"Raw LLM Output that failed to parse: {llm_output_str}")
146
+ print(traceback.format_exc())
147
+ # Fallback: return a dictionary indicating failure, or just the raw string
148
+ return StructuredAnswer(summary="LLM output parsing failed. See logs.", key_points=[], confidence_score=0.0)
149
+
150
+
151
+
152
+
153
+
154
+
155
  rag_chain = (
156
  {"context": retriever, "question": RunnablePassthrough()}
157
  | prompt