Spaces:

jaczad
/

KoREKtor

Paused

App Files Files Community

Jacek Zadrożny commited on Apr 14, 2025

Commit

344016f

1 Parent(s): 0f360ca

Alternatywny interface

Browse files

Files changed (2) hide show

README.md +1 -1
app2.py +168 -0

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ colorFrom: yellow
 colorTo: green
 sdk: gradio
 sdk_version: 5.25.0
-app_file: app.py
 pinned: true
 license: cc-by-4.0
 thumbnail: >-

 colorTo: green
 sdk: gradio
 sdk_version: 5.25.0
+app_file: app2.py
 pinned: true
 license: cc-by-4.0
 thumbnail: >-

app2.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import gradio as gr
+import pandas as pd
+from langchain_core.prompts import PromptTemplate
+from langchain_openai import ChatOpenAI
+from langchain_core.output_parsers import StrOutputParser
+from pydantic import BaseModel, Field, field_validator
+from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
+from langchain.output_parsers import PydanticOutputParser
+from docx import Document
+from datetime import datetime
+import os
+import tempfile
+# Model danych
+class QuestionAnswer(BaseModel):
+    question_number: int = Field(..., description="Numer pytania")
+    answer: str = Field(..., description="Odpowiedź, tylko TAK lub NIE")
+    citation: str = Field(..., description="Fragment cytatu")
+    @field_validator("answer")
+    def validate_answer(cls, v):
+        if v not in {"TAK", "NIE"}:
+            raise ValueError("Odpowiedź musi być TAK lub NIE")
+        return v
+class JobAdAnalysis(BaseModel):
+    answers: list[QuestionAnswer]
+parser = PydanticOutputParser(pydantic_object=JobAdAnalysis)
+# Wczytanie matrycy danych
+matryca_df = pd.read_csv('matryca.csv', header=None,
+                         names=['area', 'prompt', 'true', 'false', 'more', 'hint'])
+question_to_area_map = {}
+def prepare_questions(df):
+    global question_to_area_map
+    question_to_area_map = {}
+    questions_text = ""
+    for index, row in df.iterrows():
+        question_number = index + 1
+        questions_text += f"{question_number} {row['prompt']}\n"
+        question_to_area_map[question_number] = {
+            'area': row['area'],
+            'true': row['true'],
+            'false': row['false'],
+            'hint': row['hint'],
+            'more': row['more']
+        }
+    return questions_text
+def doc_to_text(file):
+    extension = os.path.splitext(file.name)[1].lower()
+    if extension == ".docx":
+        loader = Docx2txtLoader(file.name)
+    elif extension == ".pdf":
+        loader = PyPDFLoader(file.name)
+    else:
+        return "error"
+    pages = loader.load()
+    return "\n".join(page.page_content for page in pages)
+def create_html_inline(result: pd.DataFrame) -> str:
+    html = "<h1>Raport analizy ogłoszenia o pracę</h1>"
+    html += f"<p><strong>Data wygenerowania:</strong> {datetime.now().strftime('%d.%m.%Y %H:%M')}</p>"
+    for _, row in result.iterrows():
+        html += f"<h2>{row['area']}</h2>"
+        html += f"<blockquote>{row['citation']}</blockquote>"
+        for line in str(row['content']).split('\n'):
+            if line.strip():
+                html += f"<p>{line}</p>"
+        if pd.notna(row['more']):
+            html += "<details><summary>Dodatkowe informacje</summary>"
+            for line in str(row['more']).split('\n'):
+                if line.strip():
+                    html += f"<p>{line}</p>"
+            html += "</details>"
+    return html
+def create_report(result: pd.DataFrame) -> str:
+    doc = Document('template.docx')
+    doc.add_heading('Raport analizy ogłoszenia o pracę', 0)
+    doc.add_paragraph(f'Data wygenerowania: {datetime.now().strftime("%d.%m.%Y %H:%M")}')
+    for _, row in result.iterrows():
+        doc.add_heading(str(row['area']), 1)
+        doc.add_paragraph(str(row['citation']), style='Intense Quote')
+        for line in str(row['content']).split('\n'):
+            if line.strip():
+                doc.add_paragraph(line)
+        if pd.notna(row['more']):
+            for line in str(row['more']).split('\n'):
+                if line.strip():
+                    doc.add_paragraph(line)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp:
+        doc.save(tmp.name)
+        return tmp.name
+def analyze_job_ad(job_ad, file):
+    if file:
+        job_ad = doc_to_text(file)
+        if job_ad == "error":
+            return None, None
+    questions = prepare_questions(matryca_df)
+    prompt_template = PromptTemplate.from_template(
+        """Przeanalizuj poniższe ogłoszenie o pracę pod kątem dostępności dla osób z niepełnosprawnościami.
+        Ogłoszenie:
+        {job_ad}
+        Odpowiedz na następujące pytania:
+        {questions}
+        Format odpowiedzi powinien być w następującej strukturze JSON:
+        {{
+          "answers": [
+            {{
+              "question_number": 1,
+              "answer": "TAK/NIE",
+              "citation": "dokładny cytat z tekstu"
+            }}
+          ]
+        }}
+        """
+    )
+    model = ChatOpenAI(temperature=0, model="gpt-4o-mini")
+    chain = prompt_template | model | parser
+    response = chain.invoke({"job_ad": job_ad, "questions": questions})
+    output_df = pd.DataFrame(columns=['area', 'answer', 'citation', 'content', 'more'])
+    for i in range(16):
+        if response.answers[i].answer in {"TAK", "NIE"}:
+            new_row = {
+                'area': matryca_df.area[i],
+                'answer': response.answers[i].answer,
+                'citation': response.answers[i].citation,
+                'content': matryca_df.true[i] if response.answers[i].answer == 'TAK' else matryca_df.false[i],
+                'more': matryca_df.more[i]
+            }
+            output_df = pd.concat([output_df, pd.DataFrame([new_row])], ignore_index=True)
+    word_file_path = create_report(output_df)
+    html_output = create_html_inline(output_df)
+    return html_output, word_file_path
+# Gradio z Blocks
+with gr.Blocks(title="KoREKtor – analiza ogłoszenia") as demo:
+    gr.HTML("""
+    <div style='text-align: center;'>
+        <img src='logo-korektor.png' alt='KoREKtor' style='max-height: 80px;'>
+        <h1>KoREKtor</h1>
+    </div>
+    <p>Aplikacja KoREKtor wykorzystuje sztuczną inteligencję do analizowania ogłoszeń rekrutacyjnych pod kątem informacji dla osób z niepełnosprawnością. Możesz wkleić treść ogłoszenia do pola tekstowego lub przesłać plik w formacie PDF lub DOCX. Potem kliknij na przycisk <strong>Sprawdź</strong> i poczekaj kilkanaście sekund.</p>
+    <img src='belka.png' alt='Logotypy sponsorów' style='width: 100%; max-height: 60px;'>
+    """)
+    with gr.Row():
+        job_ad_input = gr.TextArea(label="Ogłoszenie (opcjonalnie)")
+        file_input = gr.File(label="Plik PDF lub DOCX", file_count="single")
+    analyze_button = gr.Button("Sprawdź")
+    html_output = gr.HTML(label="Wyniki analizy")
+    word_output = gr.File(label="Pobierz raport w formacie Word")
+    analyze_button.click(analyze_job_ad, inputs=[job_ad_input, file_input], outputs=[html_output, word_output])
+demo.launch(pwa=True, show_api=False, favicon_path="logo-korektor.png")