Spaces:

HF-Pawan
/

Automated-Signature-Analysis-Docker

Running

App Files Files Community

anyonehomep1mane commited on Jan 29

Commit

5637ddb

0 Parent(s):

Initial Changes

Browse files

Files changed (22) hide show

.gitattributes +1 -0
.gitignore +5 -0
Dockerfile +22 -0
RAG_Documents/Agreeableness - PDF Information.pdf +0 -0
RAG_Documents/Conscientiousness - PDF Information.pdf +0 -0
RAG_Documents/Extraversion - PDF Information.pdf +0 -0
RAG_Documents/Neuroticism - PDF Information.pdf +0 -0
RAG_Documents/Openness - PDF Information.pdf +0 -0
app.py +55 -0
base_Dockerfile +16 -0
base_README.md +10 -0
base_app.py +17 -0
base_requirements.txt +2 -0
common/gradio/common.py +76 -0
common/gradio/signature_model_tfdata.keras +3 -0
common/rag/common.py +86 -0
common/rag/document_loader.py +67 -0
common/rag/embeddings.py +91 -0
common/rag/models.py +51 -0
common/rag/prompts.py +0 -0
requirements.txt +215 -0
signature_model_tfdata.keras +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.keras filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.env
+venv
+handwriting_dataset
+heavy_handwriting_dataset
+.vscode

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+FROM python:3.10.13-slim
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    CUDA_VISIBLE_DEVICES=-1 \
+    TF_ENABLE_ONEDNN_OPTS=0
+WORKDIR /app
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+COPY . .
+CMD ["python", "app.py"]

RAG_Documents/Agreeableness - PDF Information.pdf ADDED Viewed

Binary file (54.2 kB). View file

RAG_Documents/Conscientiousness - PDF Information.pdf ADDED Viewed

Binary file (52.5 kB). View file

RAG_Documents/Extraversion - PDF Information.pdf ADDED Viewed

Binary file (51.7 kB). View file

RAG_Documents/Neuroticism - PDF Information.pdf ADDED Viewed

Binary file (53 kB). View file

RAG_Documents/Openness - PDF Information.pdf ADDED Viewed

Binary file (51.7 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# import sys
+# import tensorflow as tf
+# import gradio as gr
+# print("Python:", sys.version)
+# print("TF:", tf.__version__)
+# def hello():
+#     return "Gradio + TensorFlow OK"
+# gr.Interface(fn=hello, inputs=None, outputs="text").launch()
+import os
+os.environ.pop("TF_USE_LEGACY_KERAS", None)
+import gradio as gr
+from common.gradio.common import full_analysis
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Handwriting → Big Five Personality Prediction")
+    gr.Markdown("Upload any image of handwriting → model will try to predict personality trait")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(
+                type="pil",
+                label="Upload handwriting image",
+                sources=["upload"],
+                height=380
+            )
+        with gr.Column():
+            gr.Markdown("### Prediction")
+            prediction_output = gr.Markdown(value="Upload image and click Analyze...")
+            gr.Markdown("### Personality Description")
+            summary_output = gr.Markdown(value="Description will appear here...")
+    btn = gr.Button("Analyze", variant="primary")
+    btn.click(
+        fn=full_analysis,
+        inputs=image_input,
+        outputs=[prediction_output, summary_output]
+    )
+    image_input.change(
+        fn=full_analysis,
+        inputs=image_input,
+        outputs=[prediction_output, summary_output]
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

base_Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.10.13-slim
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

base_README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: Automated Signature Analysis Docker
+emoji: 📉
+colorFrom: blue
+colorTo: red
+sdk: docker
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

base_app.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import sys
+import platform
+from fastapi import FastAPI
+app = FastAPI()
+@app.get("/")
+def greet_json():
+    return {"Hello": "World!"}
+@app.get("/version")
+def get_version():
+    return {
+        "python_version": sys.version.split()[0],
+        "platform": platform.platform(),
+        "full_version": sys.version
+    }

base_requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ fastapi
2	+ uvicorn[standard]

common/gradio/common.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import numpy as np
+import tensorflow as tf
+import warnings
+from common.rag.common import generate_personality_summary
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+_model = None
+def predict_handwriting(image):
+    """
+    Preprocess uploaded image exactly the way model expects
+    """
+    global _model
+    if _model is None:
+        _model = tf.keras.models.load_model(
+            "signature_model_tfdata.keras",
+            compile=False
+        )
+    if image is None:
+        return "Please upload an image.", ""
+    try:
+        img = tf.keras.preprocessing.image.img_to_array(image)
+        if img.shape[-1] == 4:
+            img = img[..., :3]
+        elif img.shape[-1] == 1:
+            pass
+        elif img.shape[-1] != 3:
+            return "Unsupported image format (channels).", ""
+        if img.shape[-1] == 3:
+            img = tf.image.rgb_to_grayscale(img)
+        IMG_SIZE = 224
+        img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
+        img = img / 255.0
+        img = tf.image.grayscale_to_rgb(img)
+        img = tf.expand_dims(img, axis=0)
+        predictions = _model.predict(img, verbose=0)[0]
+        predicted_idx = np.argmax(predictions)
+        confidence = float(predictions[predicted_idx]) * 100
+        CLASS_NAMES = [
+            "Agreeableness",
+            "Conscientiousness",
+            "Extraversion",
+            "Neuroticism",
+            "Openness"
+        ]
+        trait = CLASS_NAMES[predicted_idx]
+        result = f"**Predicted Personality Trait**\n{trait}\n\n**Confidence**: {confidence:.2f}%", trait
+        return result
+    except Exception as e:
+        import traceback
+        return f"Error during prediction:\n{str(e)}", ""
+def full_analysis(image):
+    if image is None:
+        return "Please upload an image.", ""
+    prediction_text, trait = predict_handwriting(image)
+    summary = generate_personality_summary(trait) if trait else ""
+    return prediction_text, summary

common/gradio/signature_model_tfdata.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02a50f5ec38a218287eb03e2f70a468237ff0a998110175f65e87f676fca0d41
+size 23635350

common/rag/common.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from common.rag.embeddings import fetch_relevant_document
+from common.rag.models import load_huggingface_model
+def generate_personality_summary(trait):
+    """
+    Generate a graphological personality summary for a given trait/topic using RAG.
+    This function performs a Retrieval-Augmented Generation (RAG) query to analyze
+    handwriting characteristics and derive corresponding personality traits/psychological
+    interpretations — but **only** from information explicitly present in retrieved documents.
+    Important:
+        The analysis is strictly limited to the content found in the vector store.
+        No external/pre-trained graphological knowledge is used by the LLM.
+    Parameters
+    ----------
+    trait : str or None
+        The personality trait, psychological characteristic, behavioral pattern or
+        topic for which handwriting analysis should be retrieved and interpreted.
+        Examples: "introversion", "high ambition", "emotional instability", "leadership"
+    Returns
+    -------
+    str
+        Graphological analysis containing:
+        - observed handwriting features (if any were found)
+        - their professional graphological interpretation
+        - overall personality impression
+        OR one of the following safety messages:
+        - "The provided context contains insufficient information for handwriting analysis"
+        - empty string (when trait is None)
+    Notes
+    -----
+    - The function is intentionally very strict about hallucination prevention.
+    - Quality of the result depends heavily on the relevance and richness of documents
+      stored in the vector database for the given trait.
+    """
+    if trait is None:
+        return ""
+    system_message = """
+    You are a highly experienced professional graphologist with a PhD in Graphology and more than 20 years of practical experience in forensic and psychological handwriting analysis.
+    Your only task is to analyze handwriting features and give interpretations STRICTLY based on the information provided in the retrieved context/transcript.
+    Rules you must follow:
+    • Never use knowledge or assumptions from your training data
+    • Never invent or assume handwriting characteristics that are not explicitly described in the provided context
+    • If the context contains insufficient information for a meaningful analysis → answer only: "The provided context contains insufficient information for handwriting analysis"
+    • Use professional graphological terminology
+    • Structure your answer clearly: first describe observed features, then psychological/personality interpretation (if enough data)
+    Be objective, precise, and stay 100% within the provided context.
+    """
+    question = f"Analyze the handwriting features and personality traits of a person characterized as: {trait}, using ONLY the information present in the provided context."
+    context = fetch_relevant_document(topic=trait)
+    model = load_huggingface_model()
+    rag_prompt = ChatPromptTemplate.from_messages([
+        ("system", "{system_message}"),
+        ("human", """Context information:\n\n{context}\n\nQuestion:\n\n{question}\n\nTopic:{topic}\n\nAnswer:""")
+    ])
+    simple_rag_chain = (
+        rag_prompt
+        | model
+        | StrOutputParser()
+    )
+    answer = simple_rag_chain.invoke({
+        "system_message": system_message,
+        "context": context,
+        "question": question,
+        "topic": trait
+    })
+    return answer

common/rag/document_loader.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import warnings
+warnings.filterwarnings(action='ignore')
+from langchain_community.document_loaders import PyPDFDirectoryLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from dotenv import load_dotenv
+load_dotenv()
+def fetch_document_chunks():
+    """
+    Load and split all PDF files from the designated folder into manageable text chunks.
+    This function serves as the document ingestion step for the RAG pipeline.
+    It:
+    - Loads every PDF file found in the ./RAG_Documents directory
+    - Splits documents into overlapping chunks optimized for vector embedding
+      and retrieval in graphology/handwriting analysis context
+    Configuration (hardcoded):
+    - Source folder: ./RAG_Documents
+    - Chunk size: 850 characters
+    - Chunk overlap: 120 characters
+    - Splitter: RecursiveCharacterTextSplitter with common separators
+    - Includes start_index metadata for potential future reference/traceability
+    Returns
+    -------
+    list[langchain_core.documents.Document]
+        List of document chunks ready to be embedded and stored in vector database.
+        Each chunk contains:
+        - page_content: the text fragment
+        - metadata: source file, page number, start_index
+    Raises
+    ------
+    FileNotFoundError
+        If the ./RAG_Documents directory does not exist
+    ValueError
+        If no PDF files are found or directory is empty
+    Notes
+    -----
+    - This function loads and splits documents **every time it is called**.
+    - In production, consider caching the chunks or using a persistent vector store
+      to avoid repeated disk I/O and splitting.
+    - Current parameters (850/120) are reasonable for most sentence-transformers
+      models and graphology-related documents.
+    """
+    PDF_FOLDER = "./RAG_Documents"
+    CHUNK_SIZE = 850
+    CHUNK_OVERLAP = 120
+    loader = PyPDFDirectoryLoader(PDF_FOLDER)
+    docs = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=CHUNK_SIZE,
+        chunk_overlap=CHUNK_OVERLAP,
+        length_function=len,
+        separators=["\n\n", "\n", ". ", " ", ""],
+        add_start_index=True
+    )
+    chunks = text_splitter.split_documents(docs)
+    return chunks

common/rag/embeddings.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import warnings
+warnings.filterwarnings(action='ignore')
+import torch
+from langchain_community.vectorstores import FAISS
+from langchain_huggingface import HuggingFaceEmbeddings
+from common.rag.document_loader import fetch_document_chunks
+from dotenv import load_dotenv
+load_dotenv()
+def fetch_vectorstore_retriever():
+    """
+    Create and return a FAISS-based retriever for graphology/handwriting analysis documents.
+    This function:
+    - Loads sentence-transformers/all-MiniLM-L6-v2 embeddings (GPU if available)
+    - Builds a FAISS vector store from document chunks obtained via fetch_document_chunks()
+    - Returns a similarity search retriever configured to return top 10 most relevant chunks
+    Returns
+    -------
+    langchain_core.retrievers.BaseRetriever
+        Configured FAISS retriever ready to be used with .invoke() or .get_relevant_documents()
+    Notes
+    -----
+    - The vector store is **recreated from scratch every time** this function is called.
+    - This can be slow on first run or when document collection is large.
+    - Consider caching/persisting the vectorstore in production for better performance.
+    - Uses normalize_embeddings=True → cosine similarity is used internally.
+    """
+    embeddings = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-MiniLM-L6-v2",
+        model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'},
+        encode_kwargs={'normalize_embeddings': True}
+    )
+    vectorstore = FAISS.from_documents(
+        documents=fetch_document_chunks(),
+        embedding=embeddings
+    )
+    retriever = vectorstore.as_retriever(
+        search_type="similarity",
+        search_kwargs={"k": 10}
+    )
+    return retriever
+def fetch_relevant_document(topic="None"):
+    """
+    Retrieve relevant document chunks for graphological analysis of a specific topic/trait.
+    Constructs a detailed, structured query optimized for finding handwriting analysis content,
+    then retrieves the top 10 most similar document chunks from the FAISS vector store.
+    Parameters
+    ----------
+    topic : str, default="None"
+        Personality trait, psychological characteristic, writing style aspect or any topic
+        for which handwriting analysis information is requested.
+        Examples: "ambition", "emotional stability", "aggressiveness", "introversion"
+    Returns
+    -------
+    str
+        Concatenated string containing up to 10 relevant document chunks, each prefixed
+        with "[Document N]" for clear identification in the RAG context.
+        Returns empty context string if topic is "None" or no relevant chunks are found.
+    Notes
+    -----
+    - The query is intentionally very specific and structured to improve retrieval quality
+      for handwriting/graphology related content.
+    - Uses similarity (cosine) search with k=10 (top 10 results).
+    - The returned context is meant to be directly passed into a RAG prompt for LLM analysis.
+    """
+    retriever = fetch_vectorstore_retriever()
+    query = (
+        f"Handwriting sample analysis for: {topic}\n"
+        "Extract and summarize: \n"
+        "- Observed writing style characteristics (slant, pressure, size, speed, spacing, margins, baseline, letter forms, connections, etc.)\n"
+        "- Graphological interpretations of personality traits linked to those features\n"
+        "- Overall psychological or personality impression"
+    )
+    docs = retriever.invoke(query)
+    context = "\n\n".join(f"[Document {i+1}]\n{doc.page_content}\n" for i, doc in enumerate(docs))
+    return context

common/rag/models.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import warnings
+warnings.filterwarnings(action='ignore')
+from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
+from dotenv import load_dotenv
+load_dotenv()
+def load_huggingface_model():
+    """
+    Initialize and return a Hugging Face chat model wrapper for RAG-based graphology analysis.
+    Creates a HuggingFaceEndpoint instance connected to the Qwen2.5-7B-Instruct model
+    and wraps it with ChatHuggingFace for conversational compatibility with LangChain.
+    Configuration:
+    - Model: Qwen/Qwen2.5-7B-Instruct (7B parameter instruction-tuned model)
+    - Temperature: 0.65 (balanced between creativity and coherence)
+    - Max new tokens: 1024
+    - Top-p: 0.92 (nucleus sampling)
+    - Repetition penalty: 1.05 (light discouragement of repetitions)
+    Returns
+    -------
+    ChatHuggingFace
+        Configured LangChain-compatible chat model ready to be used in chains
+    Notes
+    -----
+    - Requires HUGGINGFACEHUB_API_TOKEN to be set in environment variables
+      (loaded via dotenv)
+    - Uses inference endpoint (cloud-based inference) — no local GPU/CPU loading
+    - Model is reloaded every time this function is called
+    - Current settings are optimized for structured, precise graphological analysis
+      with controlled creativity
+    - Consider adjusting temperature/max_new_tokens based on response length needs
+    Raises
+    ------
+    ValueError
+        If HUGGINGFACEHUB_API_TOKEN is missing or invalid
+    """
+    chat_llm = HuggingFaceEndpoint(
+        repo_id="Qwen/Qwen2.5-7B-Instruct",
+        task="text-generation",
+        temperature=0.65,
+        max_new_tokens=1024,
+        top_p=0.92,
+        repetition_penalty=1.05
+    )
+    model = ChatHuggingFace(llm=chat_llm)
+    return model

common/rag/prompts.py ADDED Viewed

File without changes

requirements.txt ADDED Viewed

	@@ -0,0 +1,215 @@

+absl-py
+aiofiles
+aiohappyeyeballs
+aiohttp
+aiosignal
+altair
+annotated-doc
+annotated-types
+anyio
+asttokens
+astunparse
+async-timeout
+attrs
+blinker
+brotli
+cachetools
+certifi
+charset-normalizer
+click
+cloudpickle
+colorama
+comm
+contourpy
+cycler
+dataclasses-json
+debugpy
+decorator
+distro
+exceptiongroup
+executing
+faiss-cpu
+fastapi
+ffmpy
+filelock
+filetype
+flatbuffers
+fonttools
+frozenlist
+fsspec
+gast
+gitdb
+GitPython
+google-auth
+google-auth-oauthlib
+google-genai
+google-pasta
+# gradio
+gradio_client
+greenlet
+groovy
+grpcio
+h11
+h5py
+hf-xet
+httpcore
+httpx
+httpx-sse
+huggingface-hub
+idna
+ImageIO
+ipykernel
+ipython
+jedi
+Jinja2
+jiter
+joblib
+jsonpatch
+jsonpointer
+jsonschema
+jsonschema-specifications
+jupyter_client
+jupyter_core
+keras
+kiwisolver
+langchain
+langchain-classic
+langchain-community
+langchain-core
+langchain-google-genai
+langchain-huggingface
+langchain-openai
+langchain-text-splitters
+langgraph
+langgraph-checkpoint
+langgraph-prebuilt
+langgraph-sdk
+langsmith
+lazy_loader
+libclang
+lime
+llvmlite
+Markdown
+markdown-it-py
+MarkupSafe
+marshmallow
+matplotlib
+matplotlib-inline
+mdurl
+ml_dtypes
+mpmath
+multidict
+mypy_extensions
+namex
+narwhals
+nest-asyncio
+networkx
+numba
+# numpy
+oauthlib
+openai
+opencv-python
+opt_einsum
+optree
+orjson
+ormsgpack
+packaging
+pandas
+parso
+pillow
+platformdirs
+prompt_toolkit
+propcache
+# protobuf
+psutil
+pure_eval
+pyarrow
+pyasn1
+pyasn1_modules
+pydantic
+pydantic-settings
+pydantic_core
+pydeck
+pydub
+Pygments
+pyparsing
+pypdf
+python-dateutil
+python-dotenv
+python-multipart
+pytz
+PyYAML
+pyzmq
+referencing
+regex
+reportlab
+requests
+requests-oauthlib
+requests-toolbelt
+rich
+rpds-py
+rsa
+safehttpx
+safetensors
+scikit-image
+scikit-learn
+scipy
+seaborn
+semantic-version
+sentence-transformers
+shap
+shellingham
+six
+slicer
+smmap
+sniffio
+SQLAlchemy
+stack-data
+starlette
+streamlit
+sympy
+tenacity
+tensorboard
+tensorboard-data-server
+# tensorflow
+tensorflow-estimator
+tensorflow-intel
+# tensorflow-io-gcs-filesystem
+termcolor
+tf_keras
+threadpoolctl
+tifffile
+tiktoken
+tokenizers
+toml
+tomlkit
+tornado
+# torch==2.7.1+cu118
+# torchvision==0.22.1+cu118
+# torchaudio==2.7.1+cu118
+tqdm
+traitlets
+transformers
+typer
+typer-slim
+typing-inspect
+typing-inspection
+typing_extensions
+tzdata
+urllib3
+uuid_utils
+uvicorn
+watchdog
+wcwidth
+websockets
+Werkzeug
+wrapt
+xxhash
+yarl
+zstandard
+tensorflow==2.15.1
+tf-keras==2.15.1
+numpy<2
+protobuf<4
+gradio>=4.0,<5.0

signature_model_tfdata.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02a50f5ec38a218287eb03e2f70a468237ff0a998110175f65e87f676fca0d41
+size 23635350