Added initial files including models and runtime

Browse files

Files changed (4) hide show

app.py +89 -0
pdf_model.pkl +3 -0
requirements.txt +0 -0
vectorizer.pkl +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import gradio as gr
+import joblib
+import PyPDF2
+import nltk
+from collections import Counter
+nltk.download("punkt")
+nltk.download("punkt_tab")
+nltk.download("stopwords")
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+model = joblib.load("pdf_model.pkl")
+vectorizer = joblib.load("vectorizer.pkl")
+def extract_text(file):
+    text = ""
+    reader = PyPDF2.PdfReader(file)
+    for page in reader.pages:
+        page_text = page.extract_text()
+        if page_text:
+            text += page_text
+    return text
+def extract_keywords(text):
+    words = word_tokenize(text.lower())
+    filtered = [
+        w for w in words
+        if w.isalpha() and w not in stopwords.words("english")
+    ]
+    counts = Counter(filtered)
+    keywords = [w for w,_ in counts.most_common(5)]
+    return keywords
+def summarize(text):
+    sentences = text.split(".")
+    return ".".join(sentences[:3])
+def analyze_pdf(file):
+    text = extract_text(file)
+    keywords = extract_keywords(text)
+    summary = summarize(text)
+    X = vectorizer.transform([text])
+    pred = model.predict(X)[0]
+    category = {
+        0: "Finance / Banking Document",
+        1: "Technology / Cloud / Machine Learning"
+    }
+    return f"""
+Category: {category[pred]}
+Keywords: {", ".join(keywords)}
+Summary:
+{summary}
+"""
+iface = gr.Interface(
+    fn=analyze_pdf,
+    inputs=gr.File(),
+    outputs="text",
+    title="AI PDF Analyzer",
+    description="Upload a PDF to analyze its content, keywords and summary."
+)
+iface.launch()

pdf_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9a7f7ef6a86fc591f4552b7c665519fb134f573253c87d299e6da40ca8a6335
+size 991

requirements.txt ADDED Viewed

Binary file (102 Bytes). View file

vectorizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f4bdc3a0a3119f554f85204bc0b07cecb902963980b428f35b3f77f7affdf4f
+size 1178