rag-chat-botapi

Sleeping

App Files Files Community

Pamudu13 commited on Apr 2, 2025

Commit

6e8e412

verified ·

1 Parent(s): aaf779e

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -31

app.py CHANGED Viewed

@@ -67,40 +67,34 @@ def create_db(splits):
     return vectordb
 def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
-    if llm_model == "meta-llama/Meta-Llama-3-8B-Instruct":
-        llm = HuggingFaceEndpoint(
-            repo_id=llm_model,
-            huggingfacehub_api_token = api_token,
-            temperature = temperature,
-            max_new_tokens = max_tokens,
-            top_k = top_k,
-        )
-    else:
-        llm = HuggingFaceEndpoint(
-            huggingfacehub_api_token = api_token,
-            repo_id=llm_model,
-            temperature = temperature,
-            max_new_tokens = max_tokens,
-            top_k = top_k,
-        )
     memory = ConversationBufferMemory(
         memory_key="chat_history",
         output_key='answer',
         return_messages=True
     )
-    retriever=vector_db.as_retriever()
     qa_chain = ConversationalRetrievalChain.from_llm(
         llm,
         retriever=retriever,
-        chain_type="stuff",
         memory=memory,
         return_source_documents=True,
         verbose=False,
     )
-    return qa_chain
 def format_chat_history(message, chat_history):
     """Format chat history for the LLM"""
@@ -154,31 +148,27 @@ def init_llm():
     if vector_db is None:
         return jsonify({'error': 'Please upload PDFs first'}), 400
-    # Get parameters from the incoming request
     data = request.json
-    model_name = data.get('model', 'llama')  # Default to 'llama' if not provided
-    temperature = data.get('temperature', 0.5)
-    max_tokens = data.get('max_tokens', 4096)
-    top_k = data.get('top_k', 3)
-    # Ensure the model name is valid
     if model_name not in LLM_MODELS:
         return jsonify({'error': 'Invalid model name'}), 400
     try:
-        # Initialize the LLM chain with the specified parameters and the vector_db
         qa_chain = initialize_llmchain(
             llm_model=LLM_MODELS[model_name],
             temperature=temperature,
             max_tokens=max_tokens,
             top_k=top_k,
-            vector_db=vector_db  # Pass vector_db to the function
         )
         return jsonify({'message': 'LLM initialized successfully'}), 200
     except Exception as e:
         return jsonify({'error': str(e)}), 500
 @app.route('/chat', methods=['POST'])
 def chat():
     """Handle chat interactions"""
@@ -275,7 +265,7 @@ def finish_upload():
     if not current_upload['filename']:
         return jsonify({'error': 'No upload in progress'}), 400
     try:
         # Create temp directory if it doesn't exist
         os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

     return vectordb
 def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
+    """Initialize the LLM chain with correct parameter names"""
+    llm = HuggingFaceEndpoint(
+        endpoint_url="https://api-inference.huggingface.co/models/" + llm_model,
+        task="text-generation",
+        model_kwargs={
+            "temperature": float(temperature),
+            "max_length": int(max_tokens),
+            "top_k": int(top_k)
+        },
+        huggingfacehub_api_token=api_token
+    )
     memory = ConversationBufferMemory(
         memory_key="chat_history",
         output_key='answer',
         return_messages=True
     )
+    retriever = vector_db.as_retriever()
     qa_chain = ConversationalRetrievalChain.from_llm(
         llm,
         retriever=retriever,
+        chain_type="stuff",
         memory=memory,
         return_source_documents=True,
         verbose=False,
     )
+    return qa_chain
 def format_chat_history(message, chat_history):
     """Format chat history for the LLM"""
     if vector_db is None:
         return jsonify({'error': 'Please upload PDFs first'}), 400
     data = request.json
+    model_name = data.get('model', 'llama')  # Default to 'llama'
+    temperature = float(data.get('temperature', 0.5))
+    max_tokens = int(data.get('max_tokens', 4096))
+    top_k = int(data.get('top_k', 3))
     if model_name not in LLM_MODELS:
         return jsonify({'error': 'Invalid model name'}), 400
     try:
         qa_chain = initialize_llmchain(
             llm_model=LLM_MODELS[model_name],
             temperature=temperature,
             max_tokens=max_tokens,
             top_k=top_k,
+            vector_db=vector_db
         )
         return jsonify({'message': 'LLM initialized successfully'}), 200
     except Exception as e:
         return jsonify({'error': str(e)}), 500
 @app.route('/chat', methods=['POST'])
 def chat():
     """Handle chat interactions"""
     if not current_upload['filename']:
         return jsonify({'error': 'No upload in progress'}), 400
     try:
         # Create temp directory if it doesn't exist
         os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)