Spaces:

Prathamesh1420
/

Maintenance_website

Runtime error

App Files Files Community

Prathamesh1420 commited on Sep 28, 2025

Commit

23f378c

verified ·

1 Parent(s): dc64232

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -359

app.py CHANGED Viewed

@@ -1,362 +1,3 @@
-'''
-####
-import os
-import gradio as gr
-import requests
-from pinecone import Pinecone
-from langchain.prompts import PromptTemplate
-from langchain.chains.llm import LLMChain
-from langchain.llms.base import LLM
-from typing import Optional, List, Mapping, Any
-from langchain.embeddings import HuggingFaceEmbeddings
-# ----------- 1. Custom LLM to call your LitServe endpoint -----------
-class LitServeLLM(LLM):
-    endpoint_url: str
-    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
-        payload = {"prompt": prompt}
-        response = requests.post(self.endpoint_url, json=payload)
-        if response.status_code == 200:
-            data = response.json()
-            return data.get("response", "").strip()
-        else:
-            raise ValueError(f"Request failed: {response.status_code} {response.text}")
-    @property
-    def _identifying_params(self) -> Mapping[str, Any]:
-        return {"endpoint_url": self.endpoint_url}
-    @property
-    def _llm_type(self) -> str:
-        return "litserve_llm"
-# ----------- 2. Connect to Pinecone -----------
-PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
-pc = Pinecone(api_key=PINECONE_API_KEY)
-index = pc.Index("rag-granite-index")
-# ----------- 3. Load embedding model -----------
-embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
-# ----------- 4. Function to get top context from Pinecone -----------
-def get_retrieved_context(query: str, top_k=3):
-    query_embedding = embeddings_model.embed_query(query)
-    results = index.query(
-        namespace="rag-ns",
-        vector=query_embedding,
-        top_k=top_k,
-        include_metadata=True
-    )
-    context_parts = [match['metadata']['text'] for match in results['matches']]
-    return "\n".join(context_parts)
-# ----------- 5. Create LLMChain with your model -----------
-model = LitServeLLM(
-    endpoint_url="https://8001-01k2h9d9mervcmgfn66ybkpwvq.cloudspaces.litng.ai/predict"
-)
-prompt = PromptTemplate(
-    input_variables=["context", "question"],
-    template="""
-You are a smart assistant. Based on the provided context, answer the question in 1–2 lines only.
-If the context has more details, summarize it concisely.
-Context:
-{context}
-Question: {question}
-Answer:
-"""
-)
-llm_chain = LLMChain(llm=model, prompt=prompt)
-# ----------- 6. Main RAG Function -----------
-def rag_pipeline(question):
-    try:
-        retrieved_context = get_retrieved_context(question)
-        response = llm_chain.invoke({
-            "context": retrieved_context,
-            "question": question
-        })["text"].strip()
-        # Only keep what's after "Answer:"
-        if "Answer:" in response:
-            response = response.split("Answer:", 1)[-1].strip()
-        return response
-    except Exception as e:
-        return f"Error: {str(e)}"
-# ----------- 7. Gradio UI -----------
-with gr.Blocks() as demo:
-    gr.Markdown("# 🧠 RAG Chatbot (Pinecone + LitServe)")
-    question_input = gr.Textbox(label="Ask your question here")
-    answer_output = gr.Textbox(label="Answer")
-    ask_button = gr.Button("Get Answer")
-    ask_button.click(rag_pipeline, inputs=question_input, outputs=answer_output)
-if _name_ == "_main_":
-    demo.launch()
-'''
-'''
-import os
-import gradio as gr
-import requests
-import mlflow
-import dagshub
-from pinecone import Pinecone
-from langchain.prompts import PromptTemplate
-from langchain.chains.llm import LLMChain
-from langchain.llms.base import LLM
-from typing import Optional, List, Mapping, Any
-import time
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from dotenv import load_dotenv
-from datetime import datetime
-# Load environment variables
-pinecone_api_key = os.environ["PINECONE_API_KEY"]
-mlflow_tracking_uri = os.environ["MLFLOW_TRACKING_URI"]
-# ----------- DagsHub & MLflow Setup -----------
-dagshub.init(
-    repo_owner='prathamesh.khade20',
-    repo_name='Maintenance_AI_website',
-    mlflow=True
-)
-mlflow.set_tracking_uri(mlflow_tracking_uri)
-mlflow.set_experiment("Maintenance-RAG-Chatbot")
-mlflow.langchain.autolog()
-# Initialize MLflow run for app configuration
-with mlflow.start_run(run_name=f"App-Config-{datetime.now().strftime('%Y%m%d-%H%M%S')}") as setup_run:
-    # Log environment configuration
-    mlflow.log_params({
-        "pinecone_index": "rag-granite-index",
-        "embedding_model": "all-MiniLM-L6-v2",
-        "namespace": "rag-ns",
-        "top_k": 3,
-        "llm_endpoint": "https://8001-01k2h9d9mervcmgfn66ybkpwvq.cloudspaces.litng.ai/predict"
-    })
-    # Log important files as artifacts
-    mlflow.log_text("""
-You are a smart assistant. Based on the provided context, answer the question in 1–2 lines only.
-If the context has more details, summarize it concisely.
-Context:
-{context}
-Question: {question}
-Answer:
-""", "artifacts/prompt_template.txt")
-# ----------- 1. Custom LLM for LitServe endpoint -----------
-class LitServeLLM(LLM):
-    endpoint_url: str
-    @mlflow.trace
-    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
-        payload = {"prompt": prompt}
-        with mlflow.start_span("lit_serve_request"):
-            start_time = time.time()
-            response = requests.post(self.endpoint_url, json=payload)
-            latency = time.time() - start_time
-            mlflow.log_metric("lit_serve_latency", latency)
-            if response.status_code == 200:
-                data = response.json()
-                mlflow.log_metric("response_tokens", len(data.get("response", "").split()))
-                return data.get("response", "").strip()
-            else:
-                mlflow.log_metric("request_errors", 1)
-                error_info = {
-                    "status_code": response.status_code,
-                    "error": response.text,
-                    "timestamp": datetime.now().isoformat()
-                }
-                mlflow.log_dict(error_info, "artifacts/error_log.json")
-                raise ValueError(f"Request failed: {response.status_code}")
-    @property
-    def _identifying_params(self) -> Mapping[str, Any]:
-        return {"endpoint_url": self.endpoint_url}
-    @property
-    def _llm_type(self) -> str:
-        return "litserve_llm"
-# ----------- 2. Pinecone Connection -----------
-@mlflow.trace
-def init_pinecone():
-    PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
-    pc = Pinecone(api_key=PINECONE_API_KEY)
-    return pc.Index("rag-granite-index")
-index = init_pinecone()
-# ----------- 3. Embedding Model -----------
-embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
-# ----------- 4. Context Retrieval with Tracing -----------
-@mlflow.trace
-def get_retrieved_context(query: str, top_k=3):
-    """Retrieve context from Pinecone with performance tracing"""
-    with mlflow.start_span("embedding_generation"):
-        start_time = time.time()
-        query_embedding = embeddings_model.embed_query(query)
-        mlflow.log_metric("embedding_latency", time.time() - start_time)
-    with mlflow.start_span("pinecone_query"):
-        start_time = time.time()
-        results = index.query(
-            namespace="rag-ns",
-            vector=query_embedding,
-            top_k=top_k,
-            include_metadata=True
-        )
-        mlflow.log_metric("pinecone_latency", time.time() - start_time)
-        mlflow.log_metric("retrieved_chunks", len(results['matches']))
-    context_parts = [match['metadata']['text'] for match in results['matches']]
-    return "\n".join(context_parts)
-# ----------- 5. LLM Chain Setup -----------
-model = LitServeLLM(
-    endpoint_url="https://8001-01k2h9d9mervcmgfn66ybkpwvq.cloudspaces.litng.ai/predict"
-)
-prompt = PromptTemplate(
-    input_variables=["context", "question"],
-    template="""
-You are a smart assistant. Based on the provided context, answer the question in 1–2 lines only.
-If the context has more details, summarize it concisely.
-Context:
-{context}
-Question: {question}
-Answer:
-"""
-)
-llm_chain = LLMChain(llm=model, prompt=prompt)
-# ----------- 6. RAG Pipeline with Full Tracing -----------
-@mlflow.trace
-def rag_pipeline(question):
-    """End-to-end RAG pipeline with MLflow tracing"""
-    try:
-        # Start a new nested run for each query
-        with mlflow.start_run(run_name=f"Query-{datetime.now().strftime('%H%M%S')}", nested=True):
-            mlflow.log_param("user_question", question)
-            # Retrieve context
-            retrieved_context = get_retrieved_context(question)
-            mlflow.log_text(retrieved_context, "artifacts/retrieved_context.txt")
-            # Generate response
-            start_time = time.time()
-            response = llm_chain.invoke({
-                "context": retrieved_context,
-                "question": question
-            })["text"].strip()
-            # Clean response
-            if "Answer:" in response:
-                response = response.split("Answer:", 1)[-1].strip()
-            # Log metrics
-            mlflow.log_metric("response_latency", time.time() - start_time)
-            mlflow.log_metric("response_length", len(response))
-            mlflow.log_text(response, "artifacts/response.txt")
-            return response
-    except Exception as e:
-        mlflow.log_metric("pipeline_errors", 1)
-        error_info = {
-            "error": str(e),
-            "question": question,
-            "timestamp": datetime.now().isoformat()
-        }
-        mlflow.log_dict(error_info, "artifacts/pipeline_errors.json")
-        return f"Error: {str(e)}"
-# ----------- 7. Gradio UI with Enhanced Tracking -----------
-with gr.Blocks() as demo:
-    gr.Markdown("# 🛠 Maintenance AI Assistant")
-    # Track additional UI metrics
-    usage_counter = gr.State(value=0)
-    session_start = gr.State(value=datetime.now().isoformat())
-    question_input = gr.Textbox(label="Ask your maintenance question")
-    answer_output = gr.Textbox(label="AI Response")
-    ask_button = gr.Button("Get Answer")
-    feedback = gr.Radio(["Helpful", "Not Helpful"], label="Was this response helpful?")
-    def track_usage(question, count, session_start, feedback=None):
-        """Wrapper to track usage metrics with feedback"""
-        count += 1
-        # Start tracking context
-        with mlflow.start_run(run_name=f"User-Interaction-{count}", nested=True):
-            mlflow.log_param("question", question)
-            mlflow.log_param("session_start", session_start)
-            # Get response
-            response = rag_pipeline(question)
-            # Log feedback if provided
-            if feedback:
-                mlflow.log_param("user_feedback", feedback)
-                mlflow.log_metric("helpful_responses", 1 if feedback == "Helpful" else 0)
-            # Update metrics
-            mlflow.log_metric("total_queries", count)
-            return response, count, session_start
-    ask_button.click(
-        track_usage,
-        inputs=[question_input, usage_counter, session_start],
-        outputs=[answer_output, usage_counter, session_start]
-    )
-    feedback.change(
-        track_usage,
-        inputs=[question_input, usage_counter, session_start, feedback],
-        outputs=[answer_output, usage_counter, session_start]
-    )
-if _name_ == "_main_":
-    # Log deployment information
-    with mlflow.start_run(run_name="Deployment-Info"):
-        mlflow.log_params({
-            "app_version": "1.0.0",
-            "deployment_platform": "Lightning AI",
-            "deployment_time": datetime.now().isoformat(),
-            "code_version": os.getenv("GIT_COMMIT", "dev")
-        })
-    # Start Gradio app
-    demo.launch()
-'''
 import torch
 import mauve
 from sacrebleu import corpus_bleu

 import torch
 import mauve
 from sacrebleu import corpus_bleu