msmaje commited on
Commit
9ce27e5
Β·
verified Β·
1 Parent(s): 10baa36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -59
app.py CHANGED
@@ -17,16 +17,31 @@ try:
17
  from langchain_community.vectorstores import FAISS
18
  from langchain.prompts import PromptTemplate
19
  from langchain.chains import RetrievalQA
20
- from langchain_huggingface import HuggingFaceEndpoint # Updated import
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  LANGCHAIN_AVAILABLE = True
22
  except ImportError as e:
23
- try:
24
- # Fallback to older import structure
25
- from langchain_community.llms import HuggingFaceEndpoint
26
- LANGCHAIN_AVAILABLE = True
27
- except ImportError as e2:
28
- logger.error(f"LangChain import error: {e}, {e2}")
29
- LANGCHAIN_AVAILABLE = False
30
 
31
  # Create PDFs folder if it doesn't exist
32
  PDF_FOLDER_PATH = "./pdfs"
@@ -63,57 +78,127 @@ def initialize_models():
63
  return False, f"❌ Error initializing models: {str(e)}"
64
 
65
  def create_llm():
66
- """Create and return the LLM instance with improved error handling"""
67
  hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
68
 
 
 
 
 
69
  try:
70
- # Try different model configurations
71
- models_to_try = [
72
- "microsoft/DialoGPT-medium",
73
- "google/flan-t5-base",
74
- "microsoft/DialoGPT-small",
75
- "tiiuae/falcon-7b-instruct"
76
- ]
77
-
78
- for model_id in models_to_try:
79
- try:
80
- llm = HuggingFaceEndpoint(
81
- repo_id=model_id,
82
- temperature=0.7,
83
- max_new_tokens=512,
84
- huggingfacehub_api_token=hf_token,
85
- model_kwargs={
86
- "max_length": 512,
87
- "do_sample": True,
88
- "temperature": 0.7,
89
- "top_p": 0.9,
90
- "top_k": 50
91
- }
92
- )
93
- logger.info(f"Successfully initialized LLM with model: {model_id}")
94
- return llm
95
- except Exception as model_error:
96
- logger.warning(f"Failed to initialize {model_id}: {model_error}")
97
- continue
 
98
 
99
- # If all models fail, raise the last error
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  raise Exception("All model initialization attempts failed")
101
 
102
  except Exception as e:
103
  logger.error(f"LLM creation error: {e}")
104
- # Return a simple fallback that doesn't use HuggingFace API
105
  return create_fallback_llm()
106
 
107
  def create_fallback_llm():
108
- """Create a simple fallback LLM for basic responses"""
109
- class FallbackLLM:
110
- def __call__(self, prompt):
111
- return "I apologize, but I'm experiencing technical difficulties with the language model. Please try again later or contact support."
 
112
 
113
- def invoke(self, prompt):
114
- return self.__call__(prompt)
115
-
116
- return FallbackLLM()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  def load_preloaded_pdfs(chunk_size=1000, chunk_overlap=200):
119
  """Load PDFs from the pre-existing folder"""
@@ -169,7 +254,7 @@ Helpful Answer:
169
  # Initialize LLM using the updated function
170
  llm = create_llm()
171
 
172
- # Create RetrievalQA chain with error handling
173
  try:
174
  retrieval_qa = RetrievalQA.from_chain_type(
175
  llm=llm,
@@ -178,6 +263,15 @@ Helpful Answer:
178
  return_source_documents=True,
179
  chain_type_kwargs={"prompt": prompt}
180
  )
 
 
 
 
 
 
 
 
 
181
  except Exception as chain_error:
182
  logger.error(f"Chain creation error: {chain_error}")
183
  return f"❌ Error creating QA chain: {str(chain_error)}"
@@ -292,7 +386,7 @@ Helpful Answer:
292
  # Initialize LLM using the updated function
293
  llm = create_llm()
294
 
295
- # Create RetrievalQA chain with error handling
296
  try:
297
  retrieval_qa = RetrievalQA.from_chain_type(
298
  llm=llm,
@@ -301,6 +395,14 @@ Helpful Answer:
301
  return_source_documents=True,
302
  chain_type_kwargs={"prompt": prompt}
303
  )
 
 
 
 
 
 
 
 
304
  except Exception as chain_error:
305
  logger.error(f"Chain creation error: {chain_error}")
306
  return f"❌ Error creating QA chain: {str(chain_error)}"
@@ -720,22 +822,87 @@ def create_interface():
720
  gr.Markdown("""
721
  ---
722
  <div style="text-align: center; color: #666; font-size: 0.9em;">
723
- πŸ’‘ <strong>Tip:</strong> For best results, ask specific questions about your documents
 
724
  </div>
725
  """)
726
 
727
  return demo
728
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729
  if __name__ == "__main__":
730
- # Check if running on HuggingFace Spaces
731
- if os.getenv("SPACE_ID"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
732
  demo = create_interface()
 
 
 
 
 
 
 
733
  demo.launch(
734
- server_name="0.0.0.0",
735
- server_port=7860,
736
- share=False
 
 
 
737
  )
738
- else:
739
- # Local development
740
- demo = create_interface()
741
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  from langchain_community.vectorstores import FAISS
18
  from langchain.prompts import PromptTemplate
19
  from langchain.chains import RetrievalQA
20
+
21
+ # Updated imports for LLM - try multiple import paths
22
+ try:
23
+ from langchain_community.llms import HuggingFaceHub
24
+ HUGGINGFACE_HUB_AVAILABLE = True
25
+ logger.info("Using HuggingFaceHub from langchain_community")
26
+ except ImportError:
27
+ try:
28
+ from langchain.llms import HuggingFaceHub
29
+ HUGGINGFACE_HUB_AVAILABLE = True
30
+ logger.info("Using HuggingFaceHub from langchain.llms")
31
+ except ImportError:
32
+ try:
33
+ from langchain_huggingface import HuggingFaceEndpoint
34
+ HUGGINGFACE_HUB_AVAILABLE = False
35
+ logger.info("Using HuggingFaceEndpoint as fallback")
36
+ except ImportError:
37
+ logger.error("No suitable HuggingFace LLM implementation found")
38
+ HUGGINGFACE_HUB_AVAILABLE = False
39
+
40
  LANGCHAIN_AVAILABLE = True
41
  except ImportError as e:
42
+ logger.error(f"LangChain import error: {e}")
43
+ LANGCHAIN_AVAILABLE = False
44
+ HUGGINGFACE_HUB_AVAILABLE = False
 
 
 
 
45
 
46
  # Create PDFs folder if it doesn't exist
47
  PDF_FOLDER_PATH = "./pdfs"
 
78
  return False, f"❌ Error initializing models: {str(e)}"
79
 
80
  def create_llm():
81
+ """Create and return the LLM instance with proper Runnable interface"""
82
  hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
83
 
84
+ if not hf_token:
85
+ logger.error("HuggingFace API token not found")
86
+ return create_fallback_llm()
87
+
88
  try:
89
+ # Prioritize HuggingFaceHub as it's more stable with LangChain
90
+ if HUGGINGFACE_HUB_AVAILABLE:
91
+ models_to_try = [
92
+ "mistralai/Mistral-7B-Instruct-v0.2",
93
+ "microsoft/DialoGPT-medium",
94
+ "google/flan-t5-base",
95
+ "microsoft/DialoGPT-small",
96
+ "tiiuae/falcon-7b-instruct"
97
+ ]
98
+
99
+ for model_id in models_to_try:
100
+ try:
101
+ llm = HuggingFaceHub(
102
+ repo_id=model_id,
103
+ huggingfacehub_api_token=hf_token,
104
+ model_kwargs={
105
+ "temperature": 0.7,
106
+ "max_new_tokens": 512,
107
+ "max_length": 512,
108
+ "do_sample": True,
109
+ "top_p": 0.9,
110
+ "top_k": 50
111
+ }
112
+ )
113
+ logger.info(f"Successfully initialized HuggingFaceHub with model: {model_id}")
114
+ return llm
115
+ except Exception as model_error:
116
+ logger.warning(f"Failed to initialize {model_id} with HuggingFaceHub: {model_error}")
117
+ continue
118
 
119
+ # Fallback to HuggingFaceEndpoint if HuggingFaceHub is not available
120
+ try:
121
+ from langchain_huggingface import HuggingFaceEndpoint
122
+
123
+ models_to_try = [
124
+ "mistralai/Mistral-7B-Instruct-v0.2",
125
+ "microsoft/DialoGPT-medium",
126
+ "google/flan-t5-base"
127
+ ]
128
+
129
+ for model_id in models_to_try:
130
+ try:
131
+ llm = HuggingFaceEndpoint(
132
+ repo_id=model_id,
133
+ temperature=0.7,
134
+ max_new_tokens=512,
135
+ huggingfacehub_api_token=hf_token,
136
+ model_kwargs={
137
+ "max_length": 512,
138
+ "do_sample": True,
139
+ "temperature": 0.7,
140
+ "top_p": 0.9,
141
+ "top_k": 50
142
+ }
143
+ )
144
+ logger.info(f"Successfully initialized HuggingFaceEndpoint with model: {model_id}")
145
+ return llm
146
+ except Exception as model_error:
147
+ logger.warning(f"Failed to initialize {model_id} with HuggingFaceEndpoint: {model_error}")
148
+ continue
149
+ except ImportError:
150
+ pass
151
+
152
+ # If all else fails, return fallback
153
  raise Exception("All model initialization attempts failed")
154
 
155
  except Exception as e:
156
  logger.error(f"LLM creation error: {e}")
 
157
  return create_fallback_llm()
158
 
159
  def create_fallback_llm():
160
+ """Create a proper LangChain-compatible fallback LLM"""
161
+ try:
162
+ from langchain.llms.base import LLM
163
+ from langchain.callbacks.manager import CallbackManagerForLLMRun
164
+ from typing import Optional, List, Any
165
 
166
+ class FallbackLLM(LLM):
167
+ """A simple fallback LLM that provides basic responses"""
168
+
169
+ @property
170
+ def _llm_type(self) -> str:
171
+ return "fallback"
172
+
173
+ def _call(
174
+ self,
175
+ prompt: str,
176
+ stop: Optional[List[str]] = None,
177
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
178
+ **kwargs: Any,
179
+ ) -> str:
180
+ """Basic response generation"""
181
+ if "summarize" in prompt.lower():
182
+ return "I apologize, but I'm currently experiencing technical difficulties with the AI model. However, I can see that you're asking about content in your documents. Please try rephrasing your question or check if the model service is available."
183
+ elif "what" in prompt.lower() or "how" in prompt.lower():
184
+ return "I'm having trouble processing your question due to technical issues with the language model. The document search is working, but I cannot generate detailed responses right now. Please try again later."
185
+ else:
186
+ return "I apologize, but I'm experiencing technical difficulties with the language model. The document processing is working correctly, but response generation is currently unavailable. Please try again later or contact support."
187
+
188
+ return FallbackLLM()
189
+
190
+ except ImportError:
191
+ # If we can't even import the base LLM class, create a simple mock
192
+ logger.error("Cannot create proper fallback LLM - LangChain base classes not available")
193
+
194
+ class SimpleFallback:
195
+ def invoke(self, prompt):
196
+ return "System temporarily unavailable. Please try again later."
197
+
198
+ def __call__(self, prompt):
199
+ return self.invoke(prompt)
200
+
201
+ return SimpleFallback()
202
 
203
  def load_preloaded_pdfs(chunk_size=1000, chunk_overlap=200):
204
  """Load PDFs from the pre-existing folder"""
 
254
  # Initialize LLM using the updated function
255
  llm = create_llm()
256
 
257
+ # Create RetrievalQA chain with better error handling
258
  try:
259
  retrieval_qa = RetrievalQA.from_chain_type(
260
  llm=llm,
 
263
  return_source_documents=True,
264
  chain_type_kwargs={"prompt": prompt}
265
  )
266
+
267
+ # Test the chain with a simple query to ensure it works
268
+ try:
269
+ test_result = retrieval_qa({"query": "test"})
270
+ logger.info("QA chain test successful")
271
+ except Exception as test_error:
272
+ logger.warning(f"QA chain test failed: {test_error}")
273
+ # Chain created but might have issues - continue anyway
274
+
275
  except Exception as chain_error:
276
  logger.error(f"Chain creation error: {chain_error}")
277
  return f"❌ Error creating QA chain: {str(chain_error)}"
 
386
  # Initialize LLM using the updated function
387
  llm = create_llm()
388
 
389
+ # Create RetrievalQA chain with better error handling
390
  try:
391
  retrieval_qa = RetrievalQA.from_chain_type(
392
  llm=llm,
 
395
  return_source_documents=True,
396
  chain_type_kwargs={"prompt": prompt}
397
  )
398
+
399
+ # Test the chain
400
+ try:
401
+ test_result = retrieval_qa({"query": "test"})
402
+ logger.info("QA chain test successful")
403
+ except Exception as test_error:
404
+ logger.warning(f"QA chain test failed: {test_error}")
405
+
406
  except Exception as chain_error:
407
  logger.error(f"Chain creation error: {chain_error}")
408
  return f"❌ Error creating QA chain: {str(chain_error)}"
 
822
  gr.Markdown("""
823
  ---
824
  <div style="text-align: center; color: #666; font-size: 0.9em;">
825
+ πŸ’‘ <strong>Tips:</strong> Upload multiple PDFs β€’ Use specific questions β€’ Check sources for accuracy<br>
826
+ πŸ”§ <strong>Powered by:</strong> LangChain β€’ HuggingFace β€’ FAISS β€’ Gradio
827
  </div>
828
  """)
829
 
830
  return demo
831
 
832
+ # Check if environment is properly configured
833
+ def check_environment():
834
+ """Check if the environment is properly configured"""
835
+ issues = []
836
+
837
+ if not LANGCHAIN_AVAILABLE:
838
+ issues.append("❌ LangChain not available - please install: pip install langchain langchain-community")
839
+
840
+ if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
841
+ issues.append("❌ HuggingFace API token not found - set HUGGINGFACEHUB_API_TOKEN environment variable")
842
+
843
+ return issues
844
+
845
+ # Main execution
846
  if __name__ == "__main__":
847
+ print("πŸš€ Starting PDF RAG System...")
848
+
849
+ # Check environment
850
+ env_issues = check_environment()
851
+ if env_issues:
852
+ print("\n⚠️ Environment Issues Detected:")
853
+ for issue in env_issues:
854
+ print(f" {issue}")
855
+ print("\nπŸ“‹ Setup Instructions:")
856
+ print(" 1. Install dependencies: pip install langchain langchain-community sentence-transformers faiss-cpu PyPDF2 gradio")
857
+ print(" 2. Get HuggingFace token: https://huggingface.co/settings/tokens")
858
+ print(" 3. Set environment variable: export HUGGINGFACEHUB_API_TOKEN=your_token_here")
859
+ print(" 4. Restart the application")
860
+ print("\nπŸ”„ Continuing with limited functionality...\n")
861
+
862
+ # Initialize models on startup
863
+ if LANGCHAIN_AVAILABLE:
864
+ print("πŸ”§ Initializing models...")
865
+ success, message = initialize_models()
866
+ print(f" {message}")
867
+
868
+ # Check for pre-loaded PDFs
869
+ if PRELOADED_PDFS:
870
+ pdf_count = len([f for f in os.listdir(PDF_FOLDER_PATH) if f.endswith('.pdf')])
871
+ print(f"πŸ“š Found {pdf_count} pre-loaded PDF files in ./pdfs/")
872
+
873
+ try:
874
+ # Create and launch the interface
875
  demo = create_interface()
876
+
877
+ print("🌐 Launching web interface...")
878
+ print(" πŸ“± Mobile-optimized interface")
879
+ print(" πŸ–₯️ Desktop and tablet supported")
880
+ print(" πŸ”— Access the app in your browser")
881
+
882
+ # Launch with configuration for different environments
883
  demo.launch(
884
+ share=False, # Set to True if you want a public shareable link
885
+ server_name="0.0.0.0", # Allow external access
886
+ server_port=7860, # Default Gradio port
887
+ inbrowser=True, # Auto-open browser
888
+ show_error=True, # Show detailed errors
889
+ quiet=False # Show startup logs
890
  )
891
+
892
+ except Exception as e:
893
+ print(f"❌ Failed to launch interface: {e}")
894
+ print("πŸ”§ Try these troubleshooting steps:")
895
+ print(" 1. Check if port 7860 is available")
896
+ print(" 2. Install Gradio: pip install gradio")
897
+ print(" 3. Check firewall settings")
898
+ print(" 4. Try running with: python app.py")
899
+
900
+ except KeyboardInterrupt:
901
+ print("\nπŸ‘‹ Shutting down PDF RAG System...")
902
+ print(" Thank you for using the application!")
903
+
904
+ finally:
905
+ # Cleanup
906
+ if 'vectorstore' in globals() and vectorstore is not None:
907
+ print("🧹 Cleaning up resources...")
908
+ print("βœ… Shutdown complete.")