JaphetHernandez commited on
Commit
b192930
·
verified ·
1 Parent(s): 81b1649

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -41
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
2
  import torch
3
  import pandas as pd
4
  import streamlit as st
@@ -6,53 +6,50 @@ import streamlit as st
6
  # Tu token secreto de Hugging Face
7
  huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"]
8
 
9
- # Cargar el modelo y tokenizer de un modelo más pequeño de LLaMA, usando el token secreto
10
- model_name = "meta-llama/Llama-3.1-8B" # Cambiado a un modelo más pequeño
11
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=huggingface_token)
12
-
13
-
14
- # Cargar el modelo en formato FP16 para ahorrar memoria
15
  model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=huggingface_token, torch_dtype=torch.float16)
16
 
17
-
18
-
19
  # Usar Streamlit para cargar el archivo CSV
20
  uploaded_file = st.file_uploader("Por favor sube un archivo CSV:", type="csv")
21
 
22
  if uploaded_file is not None:
23
- # Cargar el CSV y extraer la columna 'job_title'
24
  df = pd.read_csv(uploaded_file)
25
- job_title = df['job_title'].tolist()
26
-
27
- # Crear la lista de job titles en formato de texto para el prompt
28
- job_title_text = "\n".join(f"- {title}" for title in job_title)
29
-
30
- # Usar Streamlit para recoger la query del usuario
31
- user_query = st.text_input("Introduce tu query:")
32
-
33
- if user_query:
34
- # Crear el prompt usando los job titles del CSV y la query del usuario
35
- prompt = f"""
36
- You are an advanced AI assistant trained to process job titles and user queries. I will provide you with a list of job titles, and a user query. Your task is to:
37
- 1. Calculate the cosine similarity score between the query and each job title.
38
- 2. Rank the job titles from the most similar to the least similar based on their semantic meaning.
39
- 3. Return the top 5 job titles with their cosine similarity scores.
40
- Here is the list of job titles from the CSV:
41
- {job_title_text}
42
- The user's query is: "{user_query}"
43
- Now, compute the similarity scores, rank the job titles, and return the top 5.
44
- """
45
-
46
- # Tokenizar y generar respuesta
47
- inputs = tokenizer(prompt, return_tensors="pt")
48
-
49
- # Desactivar el cálculo de gradientes para ahorrar memoria
50
- with torch.no_grad():
51
- outputs = model.generate(**inputs, max_new_tokens=200)
52
-
53
- # Decodificar y mostrar resultados
54
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
55
- st.write(response)
 
 
 
56
 
57
  # Limpiar memoria después de la inferencia
58
- torch.cuda.empty_cache() # Si estás usando una GPU
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import torch
3
  import pandas as pd
4
  import streamlit as st
 
6
  # Tu token secreto de Hugging Face
7
  huggingface_token = st.secrets["HUGGINGFACEHUB_API_TOKEN"]
8
 
9
+ # Cargar el modelo y tokenizer
10
+ model_name = "meta-llama/Llama-3.1-8B"
11
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=huggingface_token)
 
 
 
12
  model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=huggingface_token, torch_dtype=torch.float16)
13
 
 
 
14
  # Usar Streamlit para cargar el archivo CSV
15
  uploaded_file = st.file_uploader("Por favor sube un archivo CSV:", type="csv")
16
 
17
  if uploaded_file is not None:
 
18
  df = pd.read_csv(uploaded_file)
19
+ if 'job_title' not in df.columns:
20
+ st.error("El archivo CSV debe contener una columna llamada 'job_title'.")
21
+ else:
22
+ job_title = df['job_title'].tolist()
23
+ job_title_text = "\n".join(f"- {title}" for title in job_title)
24
+
25
+ user_query = st.text_input("Introduce tu query:")
26
+
27
+ if user_query:
28
+ prompt = f"""
29
+ You are an advanced AI assistant trained to process job titles and user queries. I will provide you with a list of job titles, and a user query. Your task is to:
30
+ 1. Calculate the cosine similarity score between the query and each job title.
31
+ 2. Rank the job titles from the most similar to the least similar based on their semantic meaning.
32
+ 3. Return the top 5 job titles with their cosine similarity scores.
33
+ Here is the list of job titles from the CSV:
34
+ {job_title_text}
35
+ The user's query is: "{user_query}"
36
+ Now, compute the similarity scores, rank the job titles, and return the top 5.
37
+ """
38
+
39
+ inputs = tokenizer(prompt, return_tensors="pt")
40
+
41
+ try:
42
+ with torch.no_grad():
43
+ outputs = model.generate(**inputs, max_new_tokens=200)
44
+
45
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
+
47
+ if response:
48
+ st.write(response)
49
+ else:
50
+ st.error("No se generó ninguna respuesta.")
51
+ except Exception as e:
52
+ st.error(f"Error al generar la respuesta: {e}")
53
 
54
  # Limpiar memoria después de la inferencia
55
+ torch.cuda.empty_cache() # Si estás usando una GPU