Spaces:

HipFil98
/

ELAN_bot

Sleeping

App Files Files Community

HipFil98 commited on Apr 7, 2025

Commit

0bf764e

verified ·

1 Parent(s): 17196b9

Create app.py

Browse files

Files changed (1) hide show

app.py +248 -0

app.py ADDED Viewed

	@@ -0,0 +1,248 @@

+import gradio as gr
+import os
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from qdrant_client import QdrantClient
+from sentence_transformers import SentenceTransformer
+# Configure environment variables and paths
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache"
+os.environ["HF_TOKEN"] = os.environ.get("HF_TOKEN", "")  # Gets the token from Spaces secrets
+# Define paths for Qdrant database
+def get_qdrant_path():
+    if os.path.exists("/home/user/app"):  # We're on HF Spaces
+        return "/home/user/app/qdrant_data"
+    else:  # Local environment
+        return "/home/filippo/Scrivania/ELAN_bot/qdrant_data"
+QDRANT_PATH = get_qdrant_path()
+# Function to perform vector search using the existing Qdrant database
+def vector_search(query, encoder_model="nomic-ai/nomic-embed-text-v1.5", client_path=None):
+    """
+    Perform vector search on the Qdrant database and return the relevant context
+    """
+    if client_path is None:
+        client_path = QDRANT_PATH
+    try:
+        # Get the encoder and client
+        encoder = SentenceTransformer(encoder_model, trust_remote_code=True)
+        client = QdrantClient(path=client_path)
+        # Encode the query
+        query_vector = encoder.encode(query).tolist()
+        # Perform the search
+        hits = client.query_points(
+            collection_name="ELAN_docs_pages",
+            query=query_vector,
+            limit=3,
+        ).points
+        # Get the context content
+        if hits:
+            context = "\n".join([hit.payload['content'] for hit in hits])
+            return context
+        else:
+            return "No relevant documentation found."
+    except Exception as e:
+        print(f"Vector search error: {str(e)}")
+        # Fall back to a message if the search fails
+        return f"Unable to perform vector search: {str(e)}"
+# Function to get the model and tokenizer
+def get_llm():
+    """
+    Initialize and return the Llama model and tokenizer
+    """
+    # This loads the model from Hugging Face Hub using your token
+    model_id = "meta-llama/Llama-3.2-3B-Instruct"
+    # Load tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_id,
+        token=os.environ["HF_TOKEN"]
+    )
+    # Load model with memory optimizations
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        token=os.environ["HF_TOKEN"],
+        device_map="auto",
+        load_in_8bit=True,  # Reduce memory footprint
+        torch_dtype=torch.float16
+    )
+    return model, tokenizer
+# Cache model and tokenizer
+_model = None
+_tokenizer = None
+def get_cached_llm():
+    """Get or initialize the model and tokenizer"""
+    global _model, _tokenizer
+    if _model is None or _tokenizer is None:
+        _tokenizer, _model = get_llm()
+    return _model, _tokenizer
+# Function to generate response to ELAN questions
+def generate_response(query):
+    """
+    Generate a response to a question about ELAN by first searching for relevant context
+    """
+    # Get context through vector search
+    context = vector_search(query)
+    # Get model and tokenizer
+    try:
+        model, tokenizer = get_cached_llm()
+    except Exception as e:
+        return f"Error loading model: {str(e)}. Make sure you have set up the HF_TOKEN in your Space secrets and have been granted access to the model."
+    # Create the system message
+    system_prompt = "You are a virtual assistant that helps the user in using an annotation software called ELAN. Your task is to summarize information and guide the user in the usage of the software."
+    # Create the user message
+    user_prompt = f"""Context: {context}
+question: {query}
+Use exclusively the information contained in the provided context to reformulate the text in about 120 words.
+take into consideration the provided question as a reference for the formulation of the answer.
+To be more clear and coincise use numbered lists when giving instructions.
+Make sure the reformulation maintains the original meaning.
+In the output, check that there are no grammatical errors. If you find errors, correct them.
+Do not add information that is not present in the original text.
+In the output, never say that you are summarizing the text."""
+    # Format inputs for Llama-3 chat format
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt}
+    ]
+    try:
+        # Convert messages to model input format
+        inputs = tokenizer.apply_chat_template(
+            messages,
+            return_tensors="pt"
+        ).to(model.device)
+        # Generate response
+        with torch.no_grad():
+            output = model.generate(
+                inputs,
+                max_new_tokens=500,
+                temperature=0.1,
+                do_sample=True,
+            )
+        # Decode and extract only the assistant's response
+        full_response = tokenizer.decode(output[0], skip_special_tokens=True)
+        # Extract assistant's response
+        # This is a bit tricky with different models, so we'll try a few approaches
+        if "assistant" in full_response.lower():
+            assistant_response = full_response.split("assistant")[-1].strip()
+        else:
+            # Just return everything after the user's input
+            assistant_response = full_response.split(user_prompt)[-1].strip()
+        return assistant_response
+    except Exception as e:
+        return f"Error generating response: {str(e)}"
+# Function to modify XML code
+def modify_xml(xml_code, instructions):
+    """
+    Modify XML code according to user instructions
+    """
+    # Get model and tokenizer
+    try:
+        model, tokenizer = get_cached_llm()
+    except Exception as e:
+        return f"Error loading model: {str(e)}. Make sure you have set up the HF_TOKEN in your Space secrets and have been granted access to the model."
+    # Create the system message
+    system_prompt = "You are a virtual assistant that helps the user in using an annotation software called ELAN. Your task is to modify the given XML code according to the instructions given by the user."
+    # Create the user message
+    user_prompt = f"""XML code: {xml_code}
+Instructions: {instructions}
+Modify the provided code according to the instructions given above.
+The output should be the modified XML code.
+Don't add any additional information or explanations."""
+    # Format inputs for Llama-3 chat format
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt}
+    ]
+    try:
+        # Convert messages to model input format
+        inputs = tokenizer.apply_chat_template(
+            messages,
+            return_tensors="pt"
+        ).to(model.device)
+        # Generate response
+        with torch.no_grad():
+            output = model.generate(
+                inputs,
+                max_new_tokens=2000,  # Allow for longer XML outputs
+                temperature=0.1,      # Lower temperature for more deterministic XML generation
+                do_sample=False,      # No sampling for XML modification
+            )
+        # Decode and extract only the assistant's response
+        full_response = tokenizer.decode(output[0], skip_special_tokens=True)
+        # Extract assistant's response
+        if "assistant" in full_response.lower():
+            assistant_response = full_response.split("assistant")[-1].strip()
+        else:
+            # Just return everything after the user's input
+            assistant_response = full_response.split(user_prompt)[-1].strip()
+        return assistant_response
+    except Exception as e:
+        return f"Error modifying XML: {str(e)}"
+# Create the Gradio interface
+with gr.Blocks(title="ELAN Assistant") as demo:
+    gr.Markdown("# ELAN Assistant")
+    gr.Markdown("This tool helps you with ELAN annotation software. You can ask questions about ELAN or modify XML code.")
+    with gr.Tab("Ask about ELAN"):
+        gr.Markdown("Ask any question about how to use ELAN annotation software.")
+        with gr.Row():
+            question_input = gr.Textbox(label="Your question about ELAN", placeholder="How can I export files in ELAN?", lines=3)
+            question_output = gr.Textbox(label="Answer", lines=10)
+        question_button = gr.Button("Get Answer")
+        question_button.click(fn=generate_response, inputs=question_input, outputs=question_output)
+    with gr.Tab("Modify XML"):
+        gr.Markdown("Paste your XML code and provide instructions for modifications.")
+        with gr.Row():
+            xml_input = gr.Textbox(label="Your XML code", placeholder="<annotation>...</annotation>", lines=10)
+        with gr.Row():
+            instructions_input = gr.Textbox(label="Modification instructions", placeholder="Change the tier name from 'T1' to 'Speech'", lines=3)
+        with gr.Row():
+            xml_output = gr.Textbox(label="Modified XML", lines=10)
+        xml_button = gr.Button("Modify XML")
+        xml_button.click(fn=modify_xml, inputs=[xml_input, instructions_input], outputs=xml_output)
+    gr.Markdown("### About")
+    gr.Markdown("""This application uses Meta's Llama-3.2-3B-Instruct model and vector search to provide accurate information about ELAN annotation software.
+**Note:** This application requires access to the Meta-Llama model. Make sure your Hugging Face account has been granted access to the model and you've added your HF_TOKEN to the Space secrets.""")
+# Launch the app
+if __name__ == "__main__":
+    demo.queue().launch()