Spaces:
Sleeping
Sleeping
| import torch | |
| import pandas as pd | |
| import streamlit as st | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| from langchain.llms import HuggingFacePipeline | |
| from huggingface_hub import login | |
| from pydantic import BaseModel, model_validator | |
| # Token Secret of Hugging Face | |
| huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"] | |
| login(huggingface_token) | |
| # Load Llama 3.2 | |
| # model_name = "meta-llama/Llama-3.2-3B-Instruct" | |
| model_name = "meta-llama/Llama-3.2-1B-Instruct" | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, device_map = 'auto') | |
| # Detectar si hay una GPU disponible y ajustar el dispositivo | |
| device = 0 if torch.cuda.is_available() else -1 | |
| # Configuración del pipeline para generación de texto | |
| #pipe = pipeline(model=model, tokenizer=tokenizer, max_length = 512) # Check documentation without "feature-extraction" | |
| pipe = pipeline(task = 'text-generation', model=model, tokenizer=tokenizer, max_length = 512, device = device) # Check documentation without "feature-extraction" | |
| # Use transformers pipeline | |
| llm_pipeline = HuggingFacePipeline(pipeline=pipe) | |
| # Interfase of Streamlit | |
| st.title("Cosine Similarity with Llama 3.1") | |
| # initialize query | |
| query="aspiring human resources specialist" | |
| # Upload CSV file | |
| uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"]) | |
| print("Query: ", query) | |
| if uploaded_file is not None: | |
| df = pd.read_csv(uploaded_file) | |
| if 'job_title' not in df.columns: | |
| st.error("The uploaded CSV must contain a 'job_title' column.") | |
| else: | |
| job_titles = df['job_title'].tolist() | |
| if query: | |
| st.write("Query:", query) | |
| prompt = f""" | |
| You are an AI assistant. You have a list of job titles and a search query. | |
| Your task is to rank these job titles by their semantic similarity to the given query. | |
| Please provide the ranking from most relevant to least relevant. | |
| Do not calculate cosine similarity; instead, focus on understanding the semantic relevance of each job title to the query. | |
| Format your response like this: | |
| 1. [Most Relevant Job Title] | |
| 2. [Second Most Relevant Job Title] | |
| ... | |
| N. [Least Relevant Job Title] | |
| Query: "{query}" | |
| Job Titles: {job_titles} | |
| """ | |
| # Llamar al modelo con el prompt | |
| try: | |
| response = pipe(prompt, max_new_tokens = 300, max_length=1024, num_return_sequences=1) | |
| # Mostrar la respuesta del modelo | |
| st.write("Model Answer:") | |
| st.write(response[0]['generated_text']) | |
| except Exception as e: | |
| st.error(f"Error while processing: {str(e)}") |