Spaces:
No application file
No application file
| from flask import Flask, request, jsonify | |
| from langchain_community.llms import LlamaCpp | |
| import os | |
| app = Flask(__name__) | |
| n_gpu_layers = 0 | |
| n_batch = 1024 | |
| llm = LlamaCpp( | |
| model_path="Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file | |
| temperature=0.1, | |
| n_gpu_layers=n_gpu_layers, | |
| n_batch=n_batch, | |
| verbose=True, | |
| n_ctx=4096 | |
| ) | |
| file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf') | |
| print("model size ====> :", file_size.st_size, "bytes") | |
| def get_skills(): | |
| cv_body = request.json.get('cv_body') | |
| # Simple inference example | |
| output = llm( | |
| f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>", | |
| max_tokens=256, # Generate up to 256 tokens | |
| stop=["<|end|>"], | |
| echo=True, # Whether to echo the prompt | |
| ) | |
| return jsonify({'skills': output}) | |
| if __name__ == '__main__': | |
| app.run() | |
| from flask import Flask, request, jsonify | |
| import nltk | |
| from gensim.models import Word2Vec | |
| import numpy as np | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import matplotlib.pyplot as plt | |
| import io | |
| import base64 | |
| nltk.download('punkt') | |
| app = Flask(__name__) | |
| texts = [ | |
| "This is a sample text.", | |
| "Another example of text.", | |
| "More texts to compare." | |
| ] | |
| tokenized_texts = [nltk.word_tokenize(text.lower()) for text in texts] | |
| word_embeddings_model = Word2Vec(sentences=tokenized_texts, vector_size=100, window=5, min_count=1, workers=4) | |
| def text_embedding(text): | |
| words = nltk.word_tokenize(text.lower()) | |
| embeddings = [word_embeddings_model.wv[word] for word in words if word in word_embeddings_model.wv] | |
| if embeddings: | |
| return np.mean(embeddings, axis=0) | |
| else: | |
| return np.zeros(word_embeddings_model.vector_size) | |
| def process(): | |
| data = request.get_json() | |
| input_text = data.get('input_text', '') | |
| if not input_text: | |
| return jsonify({'error': 'No input text provided'}), 400 | |
| input_embedding = text_embedding(input_text) | |
| text_embeddings = [text_embedding(text) for text in texts] | |
| similarities = cosine_similarity([input_embedding], text_embeddings).flatten() | |
| similarities_percentages = [similarity * 100 for similarity in similarities] | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| texts_for_plotting = [f"Text {i+1}" for i in range(len(texts))] | |
| ax.bar(texts_for_plotting, similarities_percentages) | |
| ax.set_ylabel('Similarity (%)') | |
| ax.set_xlabel('Texts') | |
| ax.set_title('Similarity of Input Text with other texts') | |
| plt.xticks(rotation=45, ha='right') | |
| plt.tight_layout() | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format='png') | |
| buf.seek(0) | |
| img_base64 = base64.b64encode(buf.read()).decode('utf-8') | |
| plt.close() | |
| sorted_indices = np.argsort(similarities)[::-1] | |
| similar_texts = [(similarities[idx] * 100, texts[idx]) for idx in sorted_indices[:3]] | |
| response = { | |
| 'similarities': similarities_percentages, | |
| 'plot': img_base64, | |
| 'most_similar_texts': similar_texts | |
| } | |
| return jsonify(response) | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0', port=8080, debug=True) | |