Spaces:

JaphetHernandez
/

PotentialTalents_2

Sleeping

App Files Files Community

PotentialTalents_2 / app.py

JaphetHernandez

Update app.py

37003f2 verified over 1 year ago

raw

history blame contribute delete

2.83 kB

	import torch
	import pandas as pd
	import streamlit as st
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	from langchain.llms import HuggingFacePipeline
	from huggingface_hub import login
	from pydantic import BaseModel, model_validator


	# Token Secret of Hugging Face
	huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"]
	login(huggingface_token)

	# Load Llama 3.2
	# model_name = "meta-llama/Llama-3.2-3B-Instruct"
	model_name = "meta-llama/Llama-3.2-1B-Instruct"
	model = AutoModelForCausalLM.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name, device_map = 'auto')

	# Detectar si hay una GPU disponible y ajustar el dispositivo
	device = 0 if torch.cuda.is_available() else -1

	# Configuración del pipeline para generación de texto

	#pipe = pipeline(model=model, tokenizer=tokenizer, max_length = 512) # Check documentation without "feature-extraction"
	pipe = pipeline(task = 'text-generation', model=model, tokenizer=tokenizer, max_length = 512, device = device) # Check documentation without "feature-extraction"


	# Use transformers pipeline
	llm_pipeline = HuggingFacePipeline(pipeline=pipe)

	# Interfase of Streamlit
	st.title("Cosine Similarity with Llama 3.1")


	# initialize query
	query="aspiring human resources specialist"

	# Upload CSV file
	uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
	print("Query: ", query)

	if uploaded_file is not None:
	df = pd.read_csv(uploaded_file)

	if 'job_title' not in df.columns:
	st.error("The uploaded CSV must contain a 'job_title' column.")
	else:
	job_titles = df['job_title'].tolist()

	if query:
	st.write("Query:", query)

	prompt = f"""
	You are an AI assistant. You have a list of job titles and a search query.
	Your task is to rank these job titles by their semantic similarity to the given query.
	Please provide the ranking from most relevant to least relevant.
	Do not calculate cosine similarity; instead, focus on understanding the semantic relevance of each job title to the query.

	Format your response like this:
	1. [Most Relevant Job Title]
	2. [Second Most Relevant Job Title]
	...
	N. [Least Relevant Job Title]

	Query: "{query}"
	Job Titles: {job_titles}
	"""

	# Llamar al modelo con el prompt
	try:
	response = pipe(prompt, max_new_tokens = 300, max_length=1024, num_return_sequences=1)

	# Mostrar la respuesta del modelo
	st.write("Model Answer:")
	st.write(response[0]['generated_text'])

	except Exception as e:
	st.error(f"Error while processing: {str(e)}")