| | import streamlit as st |
| | import fitz |
| | from sentence_transformers import SentenceTransformer, util |
| | import faiss |
| | from transformers import pipeline |
| | import os |
| | from pathlib import Path |
| |
|
| | st.title("Évaluation Stagiaire Data Scientist") |
| |
|
| | uploaded_file = st.file_uploader("Choisissez un fichier PDF", type="pdf") |
| |
|
| | def save_uploaded_file(uploaded_file, directory): |
| | directory = Path(directory) |
| | directory.mkdir(parents=True, exist_ok=True) |
| | file_path = directory / uploaded_file.name |
| | with open(file_path, "wb") as f: |
| | f.write(uploaded_file.getbuffer()) |
| | return file_path |
| |
|
| | def extract_text_from_pdf(pdf_path): |
| | text = "" |
| | pdf_document = fitz.open(pdf_path) |
| | for page_num in range(pdf_document.page_count): |
| | page = pdf_document.load_page(page_num) |
| | text += page.get_text() |
| | return text |
| |
|
| | def index_document(text): |
| | model = SentenceTransformer('paraphrase-MiniLM-L6-v2') |
| | documents = [text] |
| | document_embeddings = model.encode(documents, convert_to_tensor=True) |
| | index = faiss.IndexFlatL2(document_embeddings.shape[1]) |
| | index.add(document_embeddings.cpu().detach().numpy()) |
| | faiss.write_index(index, 'document_index.faiss') |
| |
|
| | def get_answer_from_document(question, context): |
| | qa_pipeline = pipeline('question-answering', model='deepset/roberta-base-squad2') |
| | result = qa_pipeline(question=question, context=context) |
| | return result |
| |
|
| | def generate_questions(text, num_questions=5, num_beams=5): |
| | question_generation_pipeline = pipeline("text2text-generation", model="valhalla/t5-base-qg-hl") |
| | input_text = "generate questions: " + text |
| | questions = question_generation_pipeline(input_text, max_length=512, num_beams=num_beams, num_return_sequences=num_questions) |
| | return [q['generated_text'] for q in questions] |
| |
|
| | def evaluate_responses(user_responses, correct_answers): |
| | model = SentenceTransformer('paraphrase-MiniLM-L6-v2') |
| | user_embeddings = model.encode(user_responses, convert_to_tensor=True) |
| | correct_embeddings = model.encode(correct_answers, convert_to_tensor=True) |
| | scores = [] |
| | for user_emb, correct_emb in zip(user_embeddings, correct_embeddings): |
| | score = util.pytorch_cos_sim(user_emb, correct_emb) |
| | scores.append(score.item()) |
| | return scores |
| |
|
| | def generate_training_plan(scores, threshold=0.7): |
| | plan = [] |
| | for idx, score in enumerate(scores): |
| | if score < threshold: |
| | plan.append(f"Revoir la section correspondant à la question {idx+1}") |
| | else: |
| | plan.append(f"Passer à l'étape suivante après la question {idx+1}") |
| | return plan |
| |
|
| | if uploaded_file is not None: |
| | file_path = save_uploaded_file(uploaded_file, "uploaded_documents") |
| | st.write(f"Fichier téléchargé et sauvegardé sous : {file_path}") |
| |
|
| | document_text = extract_text_from_pdf(file_path) |
| | st.write("Texte extrait du document PDF:") |
| | st.write(document_text[:1000]) |
| |
|
| | index_document(document_text) |
| |
|
| | st.subheader("Questions générées") |
| | questions = generate_questions(document_text, num_questions=5) |
| | for idx, question in enumerate(questions): |
| | st.write(f"Question {idx+1}: {question}") |
| |
|
| | st.subheader("Évaluer les réponses de l'utilisateur") |
| | user_responses = [st.text_input(f"Réponse de l'utilisateur {idx+1}") for idx in range(5)] |
| | if st.button("Évaluer"): |
| | correct_answers = ["La réponse correcte 1", "La réponse correcte 2", "La réponse correcte 3", "La réponse correcte 4", "La réponse correcte 5"] |
| | scores = evaluate_responses(user_responses, correct_answers) |
| | for idx, score in enumerate(scores): |
| | st.write(f"Question {idx+1}: Score {score:.2f}") |
| |
|
| | st.subheader("Plan de formation personnalisé") |
| | training_plan = generate_training_plan(scores) |
| | for step in training_plan: |
| | st.write(step) |
| |
|
| |
|