Spaces:

kamkol
/

AB_Testing_RAG_Agent

Sleeping

App Files Files Community

kamkol commited on May 1, 2025

Commit

25ae1f7

1 Parent(s): 06d2b05

Simplify OpenAI initialization for Hugging Face compatibility

Browse files

Files changed (1) hide show

streamlit_app.py +78 -235

streamlit_app.py CHANGED Viewed

@@ -203,266 +203,109 @@ def load_document_chunks():
 def get_chat_model():
     """Get the chat model for initial RAG."""
     print("Initializing chat model...")
-    # Try multiple approaches to initialize the model
     try:
-        # Approach 1: Direct OpenAI client
-        print("Trying direct OpenAI client approach")
-        try:
-            # Use direct OpenAI client to avoid proxy issues
-            openai_client = OpenAI()
-            # Create a wrapper that mimics LangChain's interface
-            class SimpleOpenAIWrapper:
-                def invoke(self, messages):
-                    print("Invoking SimpleOpenAIWrapper...")
-                    # Convert LangChain messages to OpenAI format
-                    openai_messages = []
-                    for msg in messages:
-                        role = "user"
-                        if hasattr(msg, "type"):
-                            role = "assistant" if msg.type == "ai" else "user"
-                        openai_messages.append({
-                            "role": role,
-                            "content": msg.content
-                        })
-                    # Log what we're sending to OpenAI
-                    print(f"Sending {len(openai_messages)} messages to OpenAI API")
-                    # Call API directly
                     response = openai_client.chat.completions.create(
-                        model="gpt-4.1-mini",
-                        messages=openai_messages,
-                        temperature=0
                     )
-                    # Create response object with content attribute
                     class SimpleResponse:
                         def __init__(self, content):
                             self.content = content
                     result = SimpleResponse(response.choices[0].message.content)
-                    print(f"Got response from OpenAI (length: {len(result.content)})")
                     return result
-            print("Successfully created SimpleOpenAIWrapper")
-            return SimpleOpenAIWrapper()
-        except Exception as e:
-            print(f"Direct OpenAI client approach failed: {str(e)}")
-            import traceback
-            traceback.print_exc()
-            raise
-    except Exception as outer_e:
-        print(f"First approach failed: {str(outer_e)}")
-        # Approach 2: Standard LangChain
-        try:
-            print("Trying standard LangChain approach")
-            model = ChatOpenAI(model="gpt-4.1-mini", temperature=0)
-            print("Successfully created ChatOpenAI model")
-            return model
-        except Exception as e:
-            print(f"Standard LangChain approach failed: {str(e)}")
-            # Approach 3: Very minimal LangChain
-            try:
-                print("Trying minimal LangChain approach")
-                model = ChatOpenAI(model="gpt-3.5-turbo")
-                print("Successfully created minimal ChatOpenAI model")
-                return model
-            except Exception as e2:
-                print(f"Minimal LangChain also failed: {str(e2)}")
-                # Last resort: Dummy implementation
-                print("Using dummy model as last resort")
-                class DummyModel:
-                    def invoke(self, messages):
-                        print("WARNING: Using dummy model that returns fixed responses")
-                        class DummyResponse:
-                            def __init__(self):
-                                self.content = "I apologize, but I'm unable to process your query right now due to a technical issue. The system administrators have been notified."
-                        return DummyResponse()
-                return DummyModel()
 @st.cache_resource
 def get_agent_model():
     """Get the more powerful model for agent and evaluation."""
     print("Initializing agent model...")
-    # Try multiple approaches to initialize the model
-    try:
-        # Approach 1: Direct OpenAI client
-        print("Trying direct OpenAI client approach for agent model")
-        try:
-            # Use direct OpenAI client to avoid proxy issues
-            openai_client = OpenAI()
-            # Create a wrapper that mimics LangChain's interface
-            class SimpleOpenAIWrapper:
-                def invoke(self, messages):
-                    print("Invoking agent SimpleOpenAIWrapper...")
-                    # Convert LangChain messages to OpenAI format
-                    openai_messages = []
-                    for msg in messages:
-                        role = "user"
-                        if hasattr(msg, "type"):
-                            role = "assistant" if msg.type == "ai" else "user"
-                        openai_messages.append({
-                            "role": role,
-                            "content": msg.content
-                        })
-                    # Log what we're sending to OpenAI
-                    print(f"Sending {len(openai_messages)} messages to OpenAI API (agent)")
-                    # Call API directly with a more powerful model
-                    response = openai_client.chat.completions.create(
-                        model="gpt-4.1",
-                        messages=openai_messages,
-                        temperature=0
-                    )
-                    class SimpleResponse:
-                        def __init__(self, content):
-                            self.content = content
-                    result = SimpleResponse(response.choices[0].message.content)
-                    print(f"Got agent response from OpenAI (length: {len(result.content)})")
-                    return result
-            print("Successfully created agent SimpleOpenAIWrapper")
-            return SimpleOpenAIWrapper()
-        except Exception as e:
-            print(f"Direct OpenAI client approach for agent failed: {str(e)}")
-            import traceback
-            traceback.print_exc()
-            raise
-    except Exception as outer_e:
-        print(f"First agent approach failed: {str(outer_e)}")
-        # Approach 2: Standard LangChain
-        try:
-            print("Trying standard LangChain approach for agent")
-            model = ChatOpenAI(model="gpt-4.1", temperature=0)
-            print("Successfully created agent ChatOpenAI model")
-            return model
-        except Exception as e:
-            print(f"Standard LangChain approach for agent failed: {str(e)}")
-            # Approach 3: Very minimal LangChain with fallback model
-            try:
-                print("Trying minimal LangChain approach for agent")
-                model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
-                print("Successfully created minimal agent ChatOpenAI model")
-                return model
-            except Exception as e2:
-                print(f"Minimal LangChain for agent also failed: {str(e2)}")
-                # Last resort: Dummy implementation
-                print("Using dummy agent model as last resort")
-                class DummyModel:
-                    def invoke(self, messages):
-                        print("WARNING: Using dummy agent model that returns fixed responses")
-                        class DummyResponse:
-                            def __init__(self):
-                                self.content = "I apologize, but I'm unable to process complex queries right now due to a technical issue."
-                        return DummyResponse()
-                return DummyModel()
 @st.cache_resource
 def get_embedding_model():
     """Get the embedding model."""
     print("Initializing embedding model...")
     try:
-        # Approach 1: Direct OpenAI client
-        print("Trying direct OpenAI client approach for embeddings")
-        try:
-            # Create an OpenAI client directly
-            openai_client = OpenAI()
-            # Create a wrapper class that matches the interface LangChain expects
-            class SimpleEmbeddings:
-                def embed_query(self, text):
-                    print(f"Embedding query text (length: {len(text)})")
-                    try:
-                        response = openai_client.embeddings.create(
-                            model="text-embedding-3-small",
-                            input=text
-                        )
-                        print("Successfully got embedding from OpenAI API")
-                        return response.data[0].embedding
-                    except Exception as e:
-                        print(f"Error in embed_query: {str(e)}")
-                        import traceback
-                        traceback.print_exc()
-                        # Return a dummy embedding of the right size
-                        print("WARNING: Returning dummy embedding vector")
-                        return [0.0] * 1536  # Standard size for embeddings
-                def embed_documents(self, texts):
-                    print(f"Embedding {len(texts)} documents")
-                    try:
-                        if not texts:
-                            return []
-                        # Embed each text individually to avoid batch size issues
-                        results = []
-                        for i, text in enumerate(texts):
-                            print(f"Embedding document {i+1}/{len(texts)}")
-                            results.append(self.embed_query(text))
-                        return results
-                    except Exception as e:
-                        print(f"Error in embed_documents: {str(e)}")
-                        import traceback
-                        traceback.print_exc()
-                        # Return dummy embeddings
-                        print("WARNING: Returning dummy document embeddings")
-                        return [[0.0] * 1536 for _ in range(len(texts))]
-            print("Successfully created SimpleEmbeddings")
-            return SimpleEmbeddings()
-        except Exception as e:
-            print(f"Direct OpenAI client approach for embeddings failed: {str(e)}")
-            import traceback
-            traceback.print_exc()
-            raise
-    except Exception as outer_e:
-        print(f"First embedding approach failed: {str(outer_e)}")
-        # Approach 2: Standard LangChain OpenAIEmbeddings
-        try:
-            print("Trying standard LangChain approach for embeddings")
-            embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
-            print("Successfully created OpenAIEmbeddings")
-            return embeddings
-        except Exception as e:
-            print(f"Standard OpenAIEmbeddings failed: {str(e)}")
-            # Approach 3: Very minimal OpenAIEmbeddings
-            try:
-                print("Trying minimal OpenAIEmbeddings")
-                embeddings = OpenAIEmbeddings()
-                print("Successfully created minimal OpenAIEmbeddings")
-                return embeddings
-            except Exception as e2:
-                print(f"Minimal OpenAIEmbeddings failed: {str(e2)}")
-                # Last resort: Dummy implementation
-                print("Using dummy embeddings as last resort")
-                class DummyEmbeddings:
-                    def embed_query(self, text):
-                        print("WARNING: Using dummy embeddings")
-                        return [0.0] * 1536
-                    def embed_documents(self, texts):
-                        print("WARNING: Using dummy document embeddings")
-                        return [[0.0] * 1536 for _ in range(len(texts))]
-                return DummyEmbeddings()
 @st.cache_resource
 def setup_qdrant_client():

 def get_chat_model():
     """Get the chat model for initial RAG."""
     print("Initializing chat model...")
     try:
+        # Very minimal OpenAI initialization for Hugging Face compatibility
+        openai_api_key = os.environ.get("OPENAI_API_KEY", "")
+        openai_client = OpenAI(api_key=openai_api_key)
+        # Create a simplified wrapper that avoids any problematic parameters
+        class SimpleOpenAIWrapper:
+            def invoke(self, messages):
+                print("Invoking chat model...")
+                # Convert LangChain messages to OpenAI format
+                openai_messages = []
+                for msg in messages:
+                    role = "user"
+                    if hasattr(msg, "type"):
+                        role = "assistant" if msg.type == "ai" else "user"
+                    openai_messages.append({
+                        "role": role,
+                        "content": msg.content
+                    })
+                # Call API directly with absolutely minimal parameters
+                try:
                     response = openai_client.chat.completions.create(
+                        model="gpt-3.5-turbo", # Use a minimal, widely supported model
+                        messages=openai_messages
                     )
+                    # Create response object
                     class SimpleResponse:
                         def __init__(self, content):
                             self.content = content
                     result = SimpleResponse(response.choices[0].message.content)
+                    print(f"Got response of length: {len(result.content)}")
                     return result
+                except Exception as e:
+                    print(f"Error calling OpenAI API: {str(e)}")
+                    raise
+        return SimpleOpenAIWrapper()
+    except Exception as e:
+        print(f"Error initializing chat model: {str(e)}")
+        # Create dummy for testing
+        class DummyModel:
+            def invoke(self, messages):
+                print("WARNING: Using dummy model!")
+                return type('obj', (object,), {'content': 'I apologize, but I cannot access the necessary data to answer this question.'})
+        return DummyModel()
 @st.cache_resource
 def get_agent_model():
     """Get the more powerful model for agent and evaluation."""
     print("Initializing agent model...")
+    # Use the exact same approach as the chat model for consistency
+    return get_chat_model()
 @st.cache_resource
 def get_embedding_model():
     """Get the embedding model."""
     print("Initializing embedding model...")
     try:
+        # Very minimal OpenAI initialization for Hugging Face compatibility
+        openai_api_key = os.environ.get("OPENAI_API_KEY", "")
+        openai_client = OpenAI(api_key=openai_api_key)
+        # Create a wrapper that avoids any problematic parameters
+        class SimpleEmbeddings:
+            def embed_query(self, text):
+                print(f"Embedding query of length: {len(text)}")
+                try:
+                    response = openai_client.embeddings.create(
+                        model="text-embedding-ada-002", # Use older, more compatible model
+                        input=text
+                    )
+                    print("Successfully got embedding")
+                    return response.data[0].embedding
+                except Exception as e:
+                    print(f"Error in embed_query: {str(e)}")
+                    # Return a dummy embedding
+                    print("WARNING: Returning dummy embedding!")
+                    return [0.0] * 1536
+            def embed_documents(self, texts):
+                print(f"Embedding {len(texts)} documents")
+                results = []
+                for i, text in enumerate(texts):
+                    results.append(self.embed_query(text))
+                return results
+        return SimpleEmbeddings()
+    except Exception as e:
+        print(f"Error initializing embedding model: {str(e)}")
+        # Create dummy for testing
+        class DummyEmbeddings:
+            def embed_query(self, text):
+                print("WARNING: Using dummy embeddings!")
+                return [0.0] * 1536
+            def embed_documents(self, texts):
+                return [[0.0] * 1536 for _ in range(len(texts))]
+        return DummyEmbeddings()
 @st.cache_resource
 def setup_qdrant_client():