anyonehomep1mane commited on
Commit
86ef765
Β·
0 Parent(s):

Initial Changes

Browse files
Files changed (4) hide show
  1. .gitignore +3 -0
  2. Dockerfile +25 -0
  3. app.py +136 -0
  4. requirements.txt +7 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .env
2
+ venv
3
+ .vscode
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile (for custom HF Spaces deployment)
2
+ # Use: In HF Spaces, select "Docker" runtime and upload this Dockerfile.
3
+
4
+ FROM python:3.10-slim
5
+
6
+ # Set working directory
7
+ WORKDIR /app
8
+
9
+ # Install system dependencies (if needed for any libs)
10
+ RUN apt-get update && apt-get install -y \
11
+ libmagic1 \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Copy requirements and install
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy application code
19
+ COPY app.py .
20
+
21
+ # Expose Gradio port
22
+ EXPOSE 7860
23
+
24
+ # Run the app
25
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py (Main Gradio Application for HF Spaces)
2
+ # This is ready for Hugging Face Spaces deployment.
3
+ # Set HUGGINGFACE_HUB_TOKEN as a Space secret.
4
+
5
+ import os
6
+ import gradio as gr
7
+ from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
8
+ from langchain_core.prompts import ChatPromptTemplate
9
+ import PyPDF2
10
+ from docx import Document
11
+ from dotenv import load_dotenv
12
+
13
+ load_dotenv()
14
+
15
+ # LLM Setup
16
+ token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
17
+ if not token:
18
+ raise ValueError("HUGGINGFACEHUB_API_TOKEN not set. Please configure it in HF Spaces secrets.")
19
+
20
+ llm = HuggingFaceEndpoint(
21
+ repo_id="Qwen/Qwen2.5-7B-Instruct",
22
+ task="text-generation",
23
+ temperature=0.3,
24
+ top_p=0.9,
25
+ max_new_tokens=400,
26
+ huggingfacehub_api_token=token,
27
+ )
28
+
29
+ chat_model = ChatHuggingFace(llm=llm)
30
+
31
+ # Summarization Prompt
32
+ SUMMARIZE_PROMPT = ChatPromptTemplate.from_messages([
33
+ ("system", """You are a highly capable document summarization assistant.
34
+ Write a clear, concise summary of the provided document.
35
+ Focus on the main ideas, key facts, arguments and conclusions.
36
+ Use neutral language. Avoid adding information not present in the text.
37
+ Aim for 150–350 words depending on document length."""),
38
+ ("human", "{text}\n\nPlease provide a comprehensive yet concise summary."),
39
+ ])
40
+
41
+ summarize_chain = SUMMARIZE_PROMPT | chat_model
42
+
43
+ # File Extraction Function
44
+ def extract_text(file_path: str) -> str:
45
+ ext = os.path.splitext(file_path)[1].lower()
46
+
47
+ try:
48
+ if ext == ".txt":
49
+ with open(file_path, "r", encoding="utf-8") as f:
50
+ return f.read().strip()
51
+
52
+ elif ext == ".pdf":
53
+ text = ""
54
+ with open(file_path, "rb") as f:
55
+ reader = PyPDF2.PdfReader(f)
56
+ for page in reader.pages:
57
+ page_text = page.extract_text() or ""
58
+ text += page_text + "\n"
59
+ return text.strip()
60
+
61
+ elif ext == ".docx":
62
+ doc = Document(file_path)
63
+ return "\n".join(p.text for p in doc.paragraphs if p.text.strip()).strip()
64
+
65
+ else:
66
+ return "❌ Supported formats: .txt, .pdf, .docx"
67
+
68
+ except Exception as e:
69
+ return f"Error reading file: {str(e)}"
70
+
71
+ # Summarization Function
72
+ def summarize_document(file):
73
+ if not file:
74
+ return "Please upload a document."
75
+
76
+ text = extract_text(file.name)
77
+
78
+ if text.startswith("❌") or text.startswith("Error"):
79
+ return text
80
+
81
+ if len(text.strip()) < 80:
82
+ return "Not enough meaningful text extracted."
83
+
84
+ # Truncate long texts to avoid timeouts
85
+ if len(text) > 18000:
86
+ text = text[:18000]
87
+ warning = "⚠️ Document truncated to ~18k characters for processing.\n\n"
88
+ else:
89
+ warning = ""
90
+
91
+ try:
92
+ response = summarize_chain.invoke({"text": text})
93
+ summary = response.content.strip()
94
+ return warning + summary if summary else "No summary generated."
95
+
96
+ except Exception as e:
97
+ err = str(e).lower()
98
+ if "token" in err or "authorization" in err:
99
+ return "❌ Hugging Face token invalid or missing."
100
+ if "rate limit" in err:
101
+ return "❌ Rate limit reached. Try later."
102
+ return f"❌ Error: {str(e)}"
103
+
104
+ # Gradio Interface
105
+ with gr.Blocks(title="Document Summarizer") as demo:
106
+ gr.Markdown("# πŸ“„ Document Summarizer")
107
+ gr.Markdown("Upload TXT, PDF, or DOCX and get an AI summary using Qwen2.5-7B-Instruct via Hugging Face.")
108
+
109
+ file_input = gr.File(
110
+ label="Upload Document",
111
+ file_types=[".txt", ".pdf", ".docx"],
112
+ type="filepath"
113
+ )
114
+
115
+ btn = gr.Button("Generate Summary", variant="primary")
116
+
117
+ output = gr.Textbox(
118
+ label="Summary",
119
+ lines=14,
120
+ placeholder="Summary will appear here..."
121
+ )
122
+
123
+ btn.click(
124
+ fn=summarize_document,
125
+ inputs=file_input,
126
+ outputs=output
127
+ )
128
+
129
+ gr.Markdown("""
130
+ **Notes**:
131
+ - Powered by Hugging Face Inference API.
132
+ - Free tier has rate limits.
133
+ """)
134
+
135
+ if __name__ == "__main__":
136
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ langchain
3
+ langchain_core
4
+ langchain-huggingface
5
+ PyPDF2
6
+ python-docx
7
+ dotenv