doggabj commited on
Commit
6c87829
·
verified ·
1 Parent(s): c247325

Upload zero

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/AC_33_7-1.pdf filter=lfs diff=lfs merge=lfs -text
37
+ data/AC_33-2C.pdf filter=lfs diff=lfs merge=lfs -text
38
+ data/CFR-2024-title14-vol1-part33.pdf filter=lfs diff=lfs merge=lfs -text
39
+ data/CFR-2024-title14-vol1-part43.pdf filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,14 +1,14 @@
1
  ---
2
- title: Faa Agentic Ai
3
- emoji: 🦀
4
- colorFrom: purple
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 5.32.0
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
- short_description: This app uses semantic search over FAA PDF documents and the
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: FAA Engine Compliance Report Generator
3
+ emoji: ✈️
4
+ colorFrom: gray
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: "4.25.0"
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
+ # ✈️ FAA Engine Anomaly Compliance Report Generator
13
+
14
+ This app uses semantic search over FAA PDF documents and the Mistral-7B model (via Hugging Face Inference API) to generate FAA-compliant reports for engine anomalies.
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pathlib import Path
3
+ from refactored_mistral_demo_pdfs import main
4
+
5
+ # Load FAISS index once per session
6
+ if "faiss_index" not in st.session_state:
7
+ from vector_search import load_and_index_pdfs
8
+ st.session_state["faiss_index"], _, st.session_state["chunks"] = load_and_index_pdfs("data")
9
+
10
+ st.set_page_config(page_title="FAA Report Generator", layout="centered")
11
+ st.title("✈️ FAA Engine Anomaly Report Generator")
12
+
13
+ anomaly_input = st.text_input("Enter engine anomaly (e.g., 'Oil temp exceeds 110°C')")
14
+
15
+ if st.button("Generate Report"):
16
+ if not anomaly_input:
17
+ st.warning("Please enter an anomaly.")
18
+ else:
19
+ st.info("Generating report... please wait ⏳")
20
+ main(anomaly_input, st.session_state.faiss_index, st.session_state.chunks)
21
+
22
+ safe_name = "".join([c if c.isalnum() or c in (' ', '-') else '_' for c in anomaly_input])[:50]
23
+ report_name = f"{safe_name} Report.md"
24
+ if Path(report_name).exists():
25
+ st.success(f"✅ Report generated: {report_name}")
26
+ with open(report_name, "r") as file:
27
+ st.markdown(file.read())
28
+ else:
29
+ st.error("❌ Report generation failed. Please try again.")
data/AC_33-2C.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2537d3f282c27c4e742422eac09ecb57889505f81da4c5401c55d541fc8e1fd0
3
+ size 809940
data/AC_33_7-1.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f98c78050680fe9d492ccb25acf37b2c6f68475610a92e60f7dd0050260df4c4
3
+ size 110485
data/CFR-2024-title14-vol1-part33.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eac212f346a2f2208cc35040b8a2c9808fbe628151b92bb25f05321fc7109104
3
+ size 1078402
data/CFR-2024-title14-vol1-part43.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6a7376b6ffb3782fdcc71247ee1be711985a6c4c123678169d778939d79adde
3
+ size 307028
refactored_mistral_demo_pdfs.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import requests
4
+ from pathlib import Path
5
+
6
+ def call_mistral(prompt):
7
+ url = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
8
+ token = os.environ.get("HF_TOKEN", "")
9
+
10
+ headers = {
11
+ "Authorization": f"Bearer {token}",
12
+ "Content-Type": "application/json"
13
+ }
14
+
15
+ data = {
16
+ "inputs": prompt,
17
+ "parameters": {
18
+ "temperature": 0.5,
19
+ "max_new_tokens": 512
20
+ }
21
+ }
22
+
23
+ response = requests.post(url, headers=headers, json=data)
24
+
25
+ if response.status_code != 200:
26
+ raise Exception("Hugging Face API error:", response.text)
27
+
28
+ result = response.json()
29
+ if isinstance(result, list) and "generated_text" in result[0]:
30
+ return result[0]["generated_text"]
31
+ else:
32
+ raise Exception("Unexpected API output:", result)
33
+
34
+ def write_report(anomaly_description: str, plan_info: str) -> str:
35
+ prompt = f"""You are a compliance report assistant. Your task is to generate an FAA-compliant maintenance report.
36
+
37
+ Anomaly: {anomaly_description}
38
+ Regulatory Guidance: {plan_info}
39
+
40
+ Requirements:
41
+ - Include an FAA regulation reference (e.g., CFR 43.13)
42
+ - Recommend actionable steps
43
+ - Output format: Markdown
44
+ """
45
+ return call_mistral(prompt)
46
+
47
+ def validate_report(report: str) -> str:
48
+ return "Pass" if "CFR" in report and "action" in report.lower() else "Fail"
49
+
50
+ def clean_report(report: str) -> str:
51
+ match = re.search(r"(#|FAA Report|##)", report)
52
+ return report[match.start():].strip() if match else report.strip()
53
+
54
+ def main(user_input, faiss_index, chunks):
55
+ from vector_search import query_guidance
56
+
57
+ plan_info = query_guidance(user_input, faiss_index, None, chunks)
58
+ plan_text = "\n\n".join(plan_info)
59
+
60
+ report = write_report(user_input, plan_text)
61
+ report = clean_report(report)
62
+
63
+ validation_result = validate_report(report)
64
+
65
+ if validation_result == "Pass":
66
+ safe_name = re.sub(r"[^\w\- ]", "_", user_input)[:50]
67
+ report_name = f"{safe_name} Report.md"
68
+ Path(report_name).write_text(report)
69
+ else:
70
+ print("❌ Validation failed.")
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ huggingface_hub
3
+ requests
4
+ pypdf
5
+ faiss-cpu
6
+ sentence-transformers
vector_search.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from pathlib import Path
4
+ from typing import List, Tuple
5
+
6
+ import faiss
7
+ import numpy as np
8
+ from pypdf import PdfReader
9
+ from sentence_transformers import SentenceTransformer
10
+
11
+
12
+ # Paths
13
+ DATA_DIR = Path("data")
14
+ INDEX_FILE = DATA_DIR / "faa_index.faiss"
15
+ CHUNKS_FILE = DATA_DIR / "faa_chunks.json"
16
+
17
+ # Model (load once)
18
+ MODEL = SentenceTransformer("all-MiniLM-L6-v2")
19
+
20
+
21
+ def extract_text_from_pdf(pdf_path: str) -> str:
22
+ reader = PdfReader(pdf_path)
23
+ return "\n".join([page.extract_text() or "" for page in reader.pages])
24
+
25
+
26
+ def chunk_text(text: str, chunk_size: int = 500) -> List[str]:
27
+ words = text.split()
28
+ return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
29
+
30
+
31
+ def embed_chunks(chunks: List[str]) -> np.ndarray:
32
+ return MODEL.encode(chunks, show_progress_bar=True)
33
+
34
+
35
+ def save_faiss_index(index: faiss.IndexFlatL2, embeddings: np.ndarray, chunks: List[str]):
36
+ faiss.write_index(index, str(INDEX_FILE))
37
+ with open(CHUNKS_FILE, "w", encoding="utf-8") as f:
38
+ json.dump(chunks, f)
39
+ print("💾 Saved FAISS index and chunk metadata.")
40
+
41
+
42
+ def load_faiss_index() -> Tuple[faiss.IndexFlatL2, np.ndarray, List[str]]:
43
+ index = faiss.read_index(str(INDEX_FILE))
44
+ with open(CHUNKS_FILE, "r", encoding="utf-8") as f:
45
+ chunks = json.load(f)
46
+ print("🔁 Loaded FAISS index and chunks.")
47
+ return index, None, chunks # `None` because we don't reuse original embeddings
48
+
49
+
50
+ def build_faiss_index(chunks: List[str]) -> Tuple[faiss.IndexFlatL2, np.ndarray, List[str]]:
51
+ embeddings = embed_chunks(chunks)
52
+ index = faiss.IndexFlatL2(embeddings.shape[1])
53
+ index.add(embeddings)
54
+ return index, embeddings, chunks
55
+
56
+
57
+ def load_and_index_pdfs(pdf_folder: str = "data") -> Tuple[faiss.IndexFlatL2, np.ndarray, List[str]]:
58
+ if INDEX_FILE.exists() and CHUNKS_FILE.exists():
59
+ return load_faiss_index()
60
+
61
+ all_chunks = []
62
+ pdf_folder = Path(pdf_folder)
63
+ for pdf_path in pdf_folder.glob("*.pdf"):
64
+ print(f"📄 Processing {pdf_path.name}")
65
+ raw_text = extract_text_from_pdf(str(pdf_path))
66
+ chunks = chunk_text(raw_text)
67
+ all_chunks.extend(chunks)
68
+
69
+ index, embeddings, chunks = build_faiss_index(all_chunks)
70
+ save_faiss_index(index, embeddings, chunks)
71
+ return index, embeddings, chunks
72
+
73
+
74
+ def query_guidance(query: str, index: faiss.IndexFlatL2, _, chunks: List[str], top_k: int = 3) -> List[str]:
75
+ query_vec = MODEL.encode([query])
76
+ distances, indices = index.search(query_vec, top_k)
77
+ return [chunks[i] for i in indices[0]]