Spaces:

Wedyan2023
/

Data_Generation_LabelingCopy

Sleeping

App Files Files

Wedyan2023 commited on May 14

Commit

d8fa7c4

verified ·

1 Parent(s): aebaf84

Update app110.py

Browse files

Files changed (1) hide show

app110.py +24 -564

app110.py CHANGED Viewed

@@ -47,124 +47,8 @@ completion = client.chat.completions.create(
 )
 print(completion.choices[0].message)
-#######
-#####
-# from openai import OpenAI
-# client = OpenAI(
-#     base_url="https://router.huggingface.co/together/v1",
-#     #api_key="hf_XXXXX",
-#     api_key=os.environ.get('TOKEN2'), # Hugging Face API token
-# )
-# #meta-llama/Meta-Llama-3-8B-Instruct
-# completion = client.chat.completions.create(
-#     #model="meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
-#     model="meta-llama/Meta-Llama-3-8B-Instruct",
-#     messages=[
-#         {
-#             "role": "user",
-#             "content": "What is the capital of France?"
-#         }
-#     ],
-# )
-#print(completion.choices[0].message)
-#####
-##########################################################3
-# import streamlit as st
-# from transformers import AutoModelForCausalLM, AutoTokenizer
-# import torch
-# # Model selection dropdown
-# selected_model = st.selectbox(
-#     "Select Model",
-#     ["meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
-#      "meta-llama/Llama-3.3-70B-Instruct",
-#      "meta-llama/Llama-3.2-3B-Instruct",
-#      "meta-llama/Llama-4-Scout-17B-16E-Instruct",
-#      "meta-llama/Meta-Llama-3-8B-Instruct",
-#      "meta-llama/Llama-3.1-70B-Instruct"],
-#     key='model_select'
-# )
-# @st.cache_resource  # Cache the model to prevent reloading
-# def load_model(model_name):
-#     try:
-#         # Optimized model loading configuration
-#         model = AutoModelForCausalLM.from_pretrained(
-#             model_name,
-#             torch_dtype=torch.float16,        # Use half precision
-#             device_map="auto",                # Automatic device mapping
-#             load_in_8bit=True,               # Enable 8-bit quantization
-#             low_cpu_mem_usage=True,          # Optimize CPU memory usage
-#             max_memory={0: "10GB"}           # Limit GPU memory usage
-#         )
-#         tokenizer = AutoTokenizer.from_pretrained(
-#             model_name,
-#             padding_side="left",
-#             truncation_side="left"
-#         )
-#         return model, tokenizer
-#     except Exception as e:
-#         st.error(f"Error loading model: {str(e)}")
-#         return None, None
-# # Load the selected model with optimizations
-# if selected_model:
-#     model, tokenizer = load_model(selected_model)
-#     # Check if model loaded successfully
-#     if model is not None:
-#         st.success(f"Successfully loaded {selected_model}")
-#     else:
-#         st.warning("Please select a different model or check your hardware capabilities")
-# # Function to generate text
-# def generate_response(prompt, model, tokenizer):
-#     try:
-#         inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
-#         with torch.no_grad():
-#             outputs = model.generate(
-#                 inputs["input_ids"],
-#                 max_length=256,
-#                 num_return_sequences=1,
-#                 temperature=0.7,
-#                 do_sample=True,
-#                 pad_token_id=tokenizer.pad_token_id
-#             )
-#         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-#         return response
-#     except Exception as e:
-#         return f"Error generating response: {str(e)}"
-############################################################
-####new
-# from openai import OpenAI
-# client = OpenAI(
-#     base_url="https://router.huggingface.co/together/v1",
-#     api_key=os.environ.get('TOKEN2'),
-# )
-# completion = client.chat.completions.create(
-#     model="meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
-#     messages=[
-#         {
-#             "role": "user",
-#             "content": "What is the capital of France?"
-#         }
-#     ],
-#     max_tokens=512,
-# )
-# print(completion.choices[0].message)
-#####
 # Create necessary directories
 for dir_name in ['data', 'feedback']:
@@ -228,14 +112,7 @@ def read_csv_with_encoding(file):
             continue
     raise UnicodeDecodeError("Failed to read file with any supported encoding")
-#def save_feedback(feedback_data):
-    #feedback_file = 'feedback/user_feedback.csv'
-    #feedback_df = pd.DataFrame([feedback_data])
-    #if os.path.exists(feedback_file):
-        #feedback_df.to_csv(feedback_file, mode='a', header=False, index=False)
-    #else:
-        #feedback_df.to_csv(feedback_file, index=False)
 def reset_conversation():
     st.session_state.conversation = []
@@ -259,16 +136,7 @@ if "system_role" not in st.session_state:
 # Main app title
 st.title("🤖🦙 Text Data Labeling and Generation App")
-# def embed_pdf_sidebar(pdf_path):
-#     with open(pdf_path, "rb") as f:
-#         base64_pdf = base64.b64encode(f.read()).decode('utf-8')
-#     pdf_display = f"""
-#         <iframe src="data:application/pdf;base64,{base64_pdf}"
-#         width="100%" height="400" type="application/pdf"></iframe>
-#     """
-#     st.markdown(pdf_display, unsafe_allow_html=True)
-#
 # Sidebar settings
 with st.sidebar:
@@ -295,84 +163,7 @@ with st.sidebar:
         key='model_select'
     )
-#################new oooo
-# # Model selection dropdown
-# selected_model = st.selectbox(
-#     "Select Model",
-#     [#"meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
-#      "meta-llama/Llama-3.2-3B-Instruct",
-#      "meta-llama/Llama-3.3-70B-Instruct",
-#      "meta-llama/Llama-3.2-3B-Instruct",
-#      "meta-llama/Llama-4-Scout-17B-16E-Instruct",
-#      "meta-llama/Meta-Llama-3-8B-Instruct",
-#      "meta-llama/Llama-3.1-70B-Instruct"],
-#     key='model_select'
-# )
-# @st.cache_resource  # Cache the model to prevent reloading
-# def load_model(model_name):
-#     try:
-#         # Optimized model loading configuration
-#         model = AutoModelForCausalLM.from_pretrained(
-#             model_name,
-#             torch_dtype=torch.float16,        # Use half precision
-#             device_map="auto",                # Automatic device mapping
-#             load_in_8bit=True,               # Enable 8-bit quantization
-#             low_cpu_mem_usage=True,          # Optimize CPU memory usage
-#             max_memory={0: "10GB"}           # Limit GPU memory usage
-#         )
-#         tokenizer = AutoTokenizer.from_pretrained(
-#             model_name,
-#             padding_side="left",
-#             truncation_side="left"
-#         )
-#         return model, tokenizer
-#     except Exception as e:
-#         st.error(f"Error loading model: {str(e)}")
-#         return None, None
-# # Load the selected model with optimizations
-# if selected_model:
-#     model, tokenizer = load_model(selected_model)
-#     # Check if model loaded successfully
-#     if model is not None:
-#         st.success(f"Successfully loaded {selected_model}")
-#     else:
-#         st.warning("Please select a different model or check your hardware capabilities")
-# # Function to generate text
-# def generate_response(prompt, model, tokenizer):
-#     try:
-#         inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
-#         with torch.no_grad():
-#             outputs = model.generate(
-#                 inputs["input_ids"],
-#                 max_length=256,
-#                 num_return_sequences=1,
-#                 temperature=0.7,
-#                 do_sample=True,
-#                 pad_token_id=tokenizer.pad_token_id
-#             )
-#         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-#         return response
-#     except Exception as e:
-#         return f"Error generating response: {str(e)}"
-# ################
-# model = AutoModelForCausalLM.from_pretrained(
-#     "meta-llama/Meta-Llama-3-8B-Instruct",
-#     torch_dtype=torch.float16,  # Use half precision
-#     device_map="auto",          # Automatic device mapping
-#     load_in_8bit=True          # Load in 8-bit precision
-# )
     temperature = st.slider(
         "Temperature",
         0.0, 1.0, 0.7,
@@ -615,21 +406,7 @@ if "task_choice" in st.session_state:
              )
          )
-        #    template=(
-        #         "{system_role}\n"
-        #         "- Use the following parameters:\n"
-        #         "- Generate {num_examples} examples\n"
-        #         "- Each example should be between {min_words} to {max_words} words long\n"
-        #         "- Use these labels: {labels}.\n"
-        #         "- Use the following additional attributes:\n"
-        #         "{additional_attributes}\n"
-        #         #"- Format each example like this: 'Example text. Label: [label]. Attribute1: [topic1]. Attribute2: [topic2]'\n"
-        #         "- Generate the examples in this format: 'Example text. Label: label'\n"
-        #         "- Additional instructions: {user_prompt}\n"
-        #         "- Use these few-shot examples if provided:\n{few_shot_examples}\n"
-        #         "- Think step by step and ensure examples are unique and not repeated."
-        #     )
-        # )
         ##########new 22/4/2025
         formatted_attributes = "\n".join([
             f"- {attr['attribute']}: {', '.join(attr['topics'])}" for attr in additional_attributes
@@ -669,16 +446,7 @@ if "task_choice" in st.session_state:
                 st.warning("Class names must be unique.")
             elif any(not lbl.strip() for lbl in labels):
                 st.warning("All class labels must be filled in.")
-            #else:
-                #st.success("Generating examples for domain: {domain}")
-            #if not custom_domain_valid:
-                #st.warning("Custom domain name is required.")
-            #elif not labels_valid:
-                #st.warning("Please fix the label errors before generating examples.")
-            #else:
-                # Proceed to generate examples
-                #st.success(f"Generating examples for domain: {domain}")
             with st.spinner("Generating examples..."):
                 try:
@@ -694,7 +462,7 @@ if "task_choice" in st.session_state:
                         #frequency_penalty=0.5,      # Discourages frequent words
                         #presence_penalty=0.6,
                     )
- #st.session_state['system_prompt'] = system_prompt
                     #new 24 march
                     st.session_state.messages.append({"role": "user", "content": system_prompt})
                  # # ####################
@@ -734,18 +502,7 @@ if "task_choice" in st.session_state:
                                         'Use few-shot example?': 'Yes' if use_few_shot else 'No',
                                     })
-                                    # example_dict = {
-                                    #     'text': text,
-                                    #     'label': label,
-                                    #     'system_prompt': st.session_state.system_prompt,
-                                    #     'system_role': st.session_state.system_role,
-                                    #     'task_type': 'Data Generation',
-                                    #     'Use few-shot example?': 'Yes' if use_few_shot else 'No',
-                                    # }
-                                    # for attr in additional_attributes:
-                                    #     example_dict[attr['attribute']] = random.choice(attr['topics'])
-                                    # examples_list.append(example_dict)
                     if examples_list:
@@ -778,9 +535,9 @@ if "task_choice" in st.session_state:
                             "application/json",
                             key='download-json-persistent'
                         )
-                        # # Display the labeled examples
-                        # st.markdown("##### 📋 Labeled Examples Preview")
-                        # st.dataframe(df, use_container_width=True)
                     if st.button("Continue"):
                         if follow_up == "Generate more examples":
@@ -1003,78 +760,8 @@ if "task_choice" in st.session_state:
             if not labels:
                 st.warning("Please select at least one entity type.")
-                labels = ["PERSON"]
-            ##########
-            # # Extract just the entity type (before the dash)
-            # labels = [entity.split(" - ")[0] for entity in selected_entities]
-            # if not labels:
-            #     st.warning("Please select at least one entity type")
-            #     labels = ["PERSON"]  # Default if nothing selected
-    #NNew edit
-            # elif classification_type == "Multi-Class Classification":
-            #     st.write("### Multi-Class Classification Labels")
-            #     default_labels_by_domain = {
-            #         "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
-            #         "AG News": ["World", "Sports", "Business", "Sci/Tech"],
-            #         "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
-            #                     "Food & Dining", "Local Experience", "Adventure Activities",
-            #                     "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
-            #                     "Luxury Tourism"],
-            #         "Restaurant reviews": ["Italian", "French", "American"]
-            #     }
-            #     num_classes = st.slider("Number of classes", 3, 10, 3)
-            #     # Get defaults for selected domain, or empty list
-            #     defaults = default_labels_by_domain.get(domain, [])
-            #     labels = []
-            #     errors = []
-            #     cols = st.columns(3)
-            #     for i in range(num_classes):
-            #         with cols[i % 3]:
-            #             default_value = defaults[i] if i < len(defaults) else ""
-            #             label_input = st.text_input(f"Class {i+1}", default_value)
-            #             normalized_label = label_input.strip().title()
-            #             if not normalized_label:
-            #                 errors.append(f"Class {i+1} name is required.")
-            #             else:
-            #                 labels.append(normalized_label)
-            #     # Check for duplicates (case-insensitive)
-            #     if len(labels) != len(set(labels)):
-                #     errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")
-                # # Show validation results
-                # if errors:
-                #     for error in errors:
-                #         st.error(error)
-                # else:
-                #     st.success("All Labels names are valid and unique!")
-                # labels_valid = not errors  # Will be True only if there are no label errors
-        # else:
-        #     num_classes = st.slider("Number of classes", 3, 23, 3, key="label_num_classes")
-        #     labels = []
-        #     cols = st.columns(3)
-        #     for i in range(num_classes):
-        #         with cols[i % 3]:
-        #             label = st.text_input(f"Class {i+1}", f"Class_{i+1}", key=f"label_class_{i}")
-        #             labels.append(label)
         use_few_shot = st.toggle("Use few-shot examples for labeling")
         few_shot_examples = []
         if use_few_shot:
@@ -1127,78 +814,8 @@ if "task_choice" in st.session_state:
         # Customize prompt template based on classification type
         if classification_type == "Named Entity Recognition (NER)":
-            # label_prompt_template = PromptTemplate(
-            #     input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
-            #     template=(
-            #         "{system_role}\n"
-            #         #"- You are a professional Named Entity Recognition (NER) expert in {domain} domain. Your role is to identify and extract the following entity types: {labels}.\n"
-            #         "- For each text example provided, identify all entities of the requested types.\n"
-            #         "- Use the following entities: {labels}.\n"
-            #         "- Return each example followed by the entities you found in this format: 'Example text.\n \n Entities:\n [ENTITY_TYPE: entity text\n\n, ENTITY_TYPE: entity text\n\n, ...] or [No entities found]'\n"
-            #         "- If no entities of the requested types are found, indicate 'No entities found' in this text.\n"
-            #         "- Be precise about entity boundaries - don't include unnecessary words.\n"
-            #         "- Do not provide any additional information or explanations.\n"
-            #         "- Additional instructions:\n {user_prompt}\n\n"
-            #         "- Use user few-shot examples as guidance if provided:\n{few_shot_examples}\n\n"
-            #         "- Examples to analyze:\n{examples}\n\n"
-            #         "Output:\n"
-            #     )
-            # )
-            #new 22/4/2025
-            # label_prompt_template = PromptTemplate(
-            #     input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
-            #     template=(
-            #         "{system_role}\n"
-            #         "- You are performing Named Entity Recognition (NER) in the domain of {domain}.\n"
-            #         "- Use the following entity types: {labels}.\n\n"
-            #         "### Reasoning Steps:\n"
-            #         "1. Read the example carefully.\n"
-            #         "2. For each named entity mentioned, determine its meaning and role in the sentence.\n"
-            #         "3. Think about the **context**: Is it a physical location (LOC)? A geopolitical region (GPE)? A person (PERSON)?\n"
-            #         "4. Based on the definition of each label, assign the most **specific and correct** label.\n\n"
-            #         "For example:\n"
-            #         "- 'Mount Everest' → LOC (it's a mountain)\n"
-            #         "- 'France' → GPE (it's a country)\n"
-            #         "- 'Microsoft' → ORG\n"
-            #         "- 'John Smith' → PERSON\n\n"
-            #         "- Return each example followed by the entities you found in this format:\n"
-            #         "'Example text.'\nEntities: [ENTITY_TYPE: entity text, ENTITY_TYPE: entity text, ...] or [No entities found]\n"
-            #         "- If no entities of the requested types are found, return 'No entities found'.\n"
-            #         "- Be precise about entity boundaries - don't include extra words.\n"
-            #         "- Do not explain or justify your answers.\n\n"
-            #         "Additional instructions:\n{user_prompt}\n\n"
-            #         "Few-shot examples:\n{few_shot_examples}\n\n"
-            #         "Examples to label:\n{examples}\n"
-            #         "Output:\n"
-            #     )
-            #)
-            # label_prompt_template = PromptTemplate(
-            #     input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
-            #     template=(
-            #         "{system_role}\n"
-            #         "- You are an expert at Named Entity Recognition (NER) for domain: {domain}.\n"
-            #         "- Use these entity types: {labels}.\n\n"
-            #         "### Output Format:\n"
-            #         # "Return each example followed by the entities you found in this format: 'Example text.\n Entities:\n [ENTITY_TYPE: entity text\n\"
-            #         "Return each example followed by the entities you found in this format: 'Example text.\n 'Entity types:\n "Then group the entities under each label like this:\n" "
-            #         #"Then Start with this line exactly: 'Entity types\n'\n"
-            #         #"Then group the entities under each label like this:\n"
-            #         "\n PERSON – Angela Merkel, John Smith\n\n"
-            #         "\ ORG – Google, United Nations\n\n"
-            #         "\n DATE – January 1st, 2023\n\n"
-            #         "\n ... and so on.\n\n"
-            #         "If entity {labels} not found, do not write it in your response\n"
-            #         "- Do NOT output them inline after the text.\n"
-            #         "- Do NOT repeat the sentence.\n"
-            #         "- If no entities are found for a type, skip it.\n"
-            #         "- Keep the format consistent.\n\n"
-            #         "User Instructions:\n{user_prompt}\n\n"
-            #         "Few-shot Examples:\n{few_shot_examples}\n\n"
-            #         "Examples to analyze:\n{examples}"
-            #       )
-            # )
             label_prompt_template = PromptTemplate(
                 input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
                 template=(
@@ -1257,20 +874,7 @@ if "task_choice" in st.session_state:
             formatted_few_shot = "\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples])
         else:
             formatted_few_shot = ""
-# #new 22/4/2025
-#         few_shot_examples = [
-#             {"content": "Mount Everest is the tallest mountain in the world.", "label": "LOC: Mount Everest"},
-#             {"content": "The President of the United States visited Paris last summer.", "label": "GPE: United States, GPE: Paris"},
-#             {"content": "Amazon is expanding its offices in Berlin.", "label": "ORG: Amazon, GPE: Berlin"},
-#             {"content": "J.K. Rowling wrote the Harry Potter books.", "label": "PERSON: J.K. Rowling"},
-#             {"content": "Apple was founded in California in 1976.", "label": "ORG: Apple, GPE: California, DATE: 1976"},
-#             {"content": "The Nile is the longest river in Africa.", "label": "LOC: Nile, GPE: Africa"},
-#             {"content": "He arrived at 3 PM for the meeting.", "label": "TIME: 3 PM"},
-#             {"content": "She bought the dress for $200.", "label": "MONEY: $200"},
-#             {"content": "The event is scheduled for July 4th.", "label": "DATE: July 4th"},
-#             {"content": "The World Health Organization is headquartered in Geneva.", "label": "ORG: World Health Organization, GPE: Geneva"}
-#         ]
-# ###########
 # new 22/4/2025
         #formatted_few_shot = "\n".join([f"{ex['content']}\nEntities: [{ex['label']}]" for ex in few_shot_examples])
         formatted_few_shot = "\n\n".join([f"{ex['content']}\n\nEntity types\n{ex['label']}" for ex in few_shot_examples])
@@ -1308,69 +912,8 @@ if "task_choice" in st.session_state:
                             few_shot_examples=few_shot_text,
                             examples=examples_text,
                             user_prompt=user_prompt
-                    #new
-                            #'Use few-shot example?': 'Yes' if use_few_shot else 'No',
-                       )
-                    # if classification_type == "Named Entity Recognition (NER)":
-                    #     # Step 1: Split the full response by example
-                    #     raw_outputs = [block.strip() for block in response.strip().split("Entity types") if block.strip()]
-                    #     inputs = [ex.strip() for ex in examples_to_classify]
-                    #     # Step 2: Match inputs with NER output blocks
-                    #     labeled_examples = []
-                    #     for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
-                    #         labeled_examples.append({
-                    #             'text': text,
-                    #             'entities': f"Entity types\n{output_block.strip()}",
-                    #             'system_prompt': st.session_state.system_prompt,
-                    #             'system_role': st.session_state.system_role,
-                    #             'task_type': 'Named Entity Recognition (NER)',
-                    #             'Use few-shot example?': 'Yes' if use_few_shot else 'No',
-                    #         })
-                    # if classification_type == "Named Entity Recognition (NER)":
-                    #     # Step 1: Split the full response by example
-                    #     raw_outputs = [block.strip() for block in response.strip().split("Entity types") if block.strip()]
-                    #     inputs = [ex.strip() for ex in examples_to_classify]
-                    #     # Step 2: Match inputs with NER output blocks
-                    #     labeled_examples = []
-                    #     for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
-                    #         labeled_examples.append({
-                    #             'text': text,
-                    #             'entities': f"Entity types\n{output_block.strip()}",
-                    #             'system_prompt': st.session_state.system_prompt,
-                    #             'system_role': st.session_state.system_role,
-                    #             'task_type': 'Named Entity Recognition (NER)',
-                    #             'Use few-shot example?': 'Yes' if use_few_shot else 'No',
-                    #         })
-                    # import re
-                    # if classification_type == "Named Entity Recognition (NER)":
-                    #     # Use regex to split on "Entity types" while keeping it attached to each block
-                    #     blocks = re.split(r"(Entity types)", response.strip())
-                    #     # Recombine 'Entity types' with each block after splitting
-                    #     raw_outputs = [
-                    #         (blocks[i] + blocks[i+1]).strip()
-                    #         for i in range(1, len(blocks) - 1, 2)
-                    #     ]
-                    #     inputs = [ex.strip() for ex in examples_to_classify]
-                    #     labeled_examples = []
-                    #     for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
-                    #         labeled_examples.append({
-                    #             'text': text,
-                    #             'entities': output_block,
-                    #             'system_prompt': st.session_state.system_prompt,
-                    #             'system_role': st.session_state.system_role,
-                    #             'task_type': 'Named Entity Recognition (NER)',
-                    #             'Use few-shot example?': 'Yes' if use_few_shot else 'No',
-                    #         })
                     else:
                         system_prompt = label_prompt_template.format(
@@ -1399,16 +942,7 @@ if "task_choice" in st.session_state:
                         #################
                         response = st.write_stream(stream)
                         st.session_state.messages.append({"role": "assistant", "content": response})
-                         # Display the labeled examples
-                        #    # Optional: If you want to add it as a chat-style message log
-                        # preview_str = st.session_state.labeled_preview.to_markdown(index=False)
-                        # st.session_state.messages.append({"role": "assistant", "content": f"Here is a preview of the labeled examples:\n\n{preview_str}"})
-                        # # Stream response and append assistant message
-                        # #14/4/2024
-                        # response = st.write_stream(stream)
-                        # st.session_state.messages.append({"role": "assistant", "content": response})
                           # Initialize session state variables if they don't exist
                         if 'system_prompt' not in st.session_state:
@@ -1427,28 +961,6 @@ if "task_choice" in st.session_state:
                             st.session_state.generated_examples_json = None
-                        # Save labeled examples to CSV
-                        #new 14/4/2025
-                        #labeled_examples = []
-                        # if classification_type == "Named Entity Recognition (NER)":
-                        #     labeled_examples = []
-                        #     for line in response.split('\n'):
-                        #         if line.strip():
-                        #             parts = line.rsplit('Entities:', 1)
-                        #             if len(parts) == 2:
-                        #                 text = parts[0].strip()
-                        #                 entities = parts[1].strip()
-                        #                 if text and entities:
-                        #                     labeled_examples.append({
-                        #                         'text': text,
-                        #                         'entities': entities,
-                        #                         'system_prompt': st.session_state.system_prompt,
-                        #                         'system_role': st.session_state.system_role,
-                        #                         'task_type': 'Named Entity Recognition (NER)',
-                        #                         'Use few-shot example?': 'Yes' if use_few_shot else 'No',
-                        #                     })
                                             #new 22/4/2025
                         labeled_examples = []
@@ -1504,25 +1016,6 @@ if "task_choice" in st.session_state:
                                 "examples": labeled_examples
                             }, indent=2).encode('utf-8')
-                            ############
-                            # CSV
-                            # st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
-                            # # JSON
-                            # st.session_state.labeled_examples_json = json.dumps({
-                            #     "metadata": {
-                            #         "domain": domain,
-                            #         "labels": labels,
-                            #         "used_few_shot": use_few_shot,
-                            #         "task_type": "Named Entity Recognition (NER)",
-                            #         "timestamp": datetime.now().isoformat()
-                            #     },
-                            #     "examples": labeled_examples
-                            # }, indent=2).encode('utf-8')
-                                                        ########
-                            # st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
-                            # st.session_state.labeled_examples_json = json.dumps(labeled_examples, indent=2).encode('utf-8')
                             # Download buttons
                             st.download_button(
@@ -1547,45 +1040,12 @@ if "task_choice" in st.session_state:
                             # Display the labeled examples
                             st.markdown("##### 📋 Labeled Examples Preview")
                             st.dataframe(df, use_container_width=True)
-                            # Display section
-                            #st.markdown("### 📋 Labeled Examples Preview")
-                            #st.dataframe(st.session_state.labeled_preview, use_container_width=True)
-                        # if labeled_examples:
-                        #     df = pd.DataFrame(labeled_examples)
-                        #     csv = df.to_csv(index=False).encode('utf-8')
-                        #     st.download_button(
-                        #         "📥 Download Labeled Examples",
-                        #         csv,
-                        #         "labeled_examples.csv",
-                        #         "text/csv",
-                        #         key='download-labeled-csv'
-                        #     )
-                        # # Add space and center the "or"
-                        # st.markdown("""
-                        # <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . .         or</div>
-                        # """, unsafe_allow_html=True)
-                        # if labeled_examples:
-                        #     df = pd.DataFrame(labeled_examples)
-                        #     csv = df.to_csv(index=False).encode('utf-8')
-                        #     st.download_button(
-                        #         "📥 Download Labeled Examples",
-                        #         csv,
-                        #         "labeled_examples.json",
-                        #         "text/json",
-                        #         key='download-labeled-JSON'
-                        #     )
-                        # Add follow-up interaction options
-                        #st.markdown("---")
-                        #follow_up = st.radio(
-                            #"What would you like to do next?",
-                             #["Label more data", "Data Generation"],
-                           # key="labeling_follow_up"
-                      #  )
                         if st.button("Continue"):
                             if follow_up == "Label more data":

 )
 print(completion.choices[0].message)
 # Create necessary directories
 for dir_name in ['data', 'feedback']:
             continue
     raise UnicodeDecodeError("Failed to read file with any supported encoding")
+#
 def reset_conversation():
     st.session_state.conversation = []
 # Main app title
 st.title("🤖🦙 Text Data Labeling and Generation App")
 # Sidebar settings
 with st.sidebar:
         key='model_select'
     )
     temperature = st.slider(
         "Temperature",
         0.0, 1.0, 0.7,
              )
          )
         ##########new 22/4/2025
         formatted_attributes = "\n".join([
             f"- {attr['attribute']}: {', '.join(attr['topics'])}" for attr in additional_attributes
                 st.warning("Class names must be unique.")
             elif any(not lbl.strip() for lbl in labels):
                 st.warning("All class labels must be filled in.")
             with st.spinner("Generating examples..."):
                 try:
                         #frequency_penalty=0.5,      # Discourages frequent words
                         #presence_penalty=0.6,
                     )
                     #new 24 march
                     st.session_state.messages.append({"role": "user", "content": system_prompt})
                  # # ####################
                                         'Use few-shot example?': 'Yes' if use_few_shot else 'No',
                                     })
                     if examples_list:
                             "application/json",
                             key='download-json-persistent'
                         )
+                        # Display the labeled examples
+                        st.markdown("##### 📋 Labeled Examples Preview")
+                        st.dataframe(df, use_container_width=True)
                     if st.button("Continue"):
                         if follow_up == "Generate more examples":
             if not labels:
                 st.warning("Please select at least one entity type.")
+                labels = ["PERSON"]
         use_few_shot = st.toggle("Use few-shot examples for labeling")
         few_shot_examples = []
         if use_few_shot:
         # Customize prompt template based on classification type
         if classification_type == "Named Entity Recognition (NER)":
             label_prompt_template = PromptTemplate(
                 input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
                 template=(
             formatted_few_shot = "\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples])
         else:
             formatted_few_shot = ""
 # new 22/4/2025
         #formatted_few_shot = "\n".join([f"{ex['content']}\nEntities: [{ex['label']}]" for ex in few_shot_examples])
         formatted_few_shot = "\n\n".join([f"{ex['content']}\n\nEntity types\n{ex['label']}" for ex in few_shot_examples])
                             few_shot_examples=few_shot_text,
                             examples=examples_text,
                             user_prompt=user_prompt
                     else:
                         system_prompt = label_prompt_template.format(
                         #################
                         response = st.write_stream(stream)
                         st.session_state.messages.append({"role": "assistant", "content": response})
                           # Initialize session state variables if they don't exist
                         if 'system_prompt' not in st.session_state:
                             st.session_state.generated_examples_json = None
                                             #new 22/4/2025
                         labeled_examples = []
                                 "examples": labeled_examples
                             }, indent=2).encode('utf-8')
                             # Download buttons
                             st.download_button(
                             # Display the labeled examples
                             st.markdown("##### 📋 Labeled Examples Preview")
                             st.dataframe(df, use_container_width=True)
+                            Display section
+                            st.markdown("### 📋 Labeled Examples Preview")
+                            st.dataframe(st.session_state.labeled_preview, use_container_width=True)
                         if st.button("Continue"):
                             if follow_up == "Label more data":