Spaces:
Running
Running
| # app.py (Main Gradio Application for HF Spaces) | |
| # This is ready for Hugging Face Spaces deployment. | |
| # Set HUGGINGFACE_HUB_TOKEN as a Space secret. | |
| import os | |
| import gradio as gr | |
| from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace | |
| from langchain_core.prompts import ChatPromptTemplate | |
| import PyPDF2 | |
| from docx import Document | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # LLM Setup | |
| token = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
| if not token: | |
| raise ValueError("HUGGINGFACEHUB_API_TOKEN not set. Please configure it in HF Spaces secrets.") | |
| llm = HuggingFaceEndpoint( | |
| repo_id="Qwen/Qwen2.5-7B-Instruct", | |
| task="text-generation", | |
| temperature=0.3, | |
| top_p=0.9, | |
| max_new_tokens=400, | |
| huggingfacehub_api_token=token, | |
| ) | |
| chat_model = ChatHuggingFace(llm=llm) | |
| # Summarization Prompt | |
| SUMMARIZE_PROMPT = ChatPromptTemplate.from_messages([ | |
| ("system", """You are a highly capable document summarization assistant. | |
| Write a clear, concise summary of the provided document. | |
| Focus on the main ideas, key facts, arguments and conclusions. | |
| Use neutral language. Avoid adding information not present in the text. | |
| Aim for 150β350 words depending on document length."""), | |
| ("human", "{text}\n\nPlease provide a comprehensive yet concise summary."), | |
| ]) | |
| summarize_chain = SUMMARIZE_PROMPT | chat_model | |
| # File Extraction Function | |
| def extract_text(file_path: str) -> str: | |
| ext = os.path.splitext(file_path)[1].lower() | |
| try: | |
| if ext == ".txt": | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| return f.read().strip() | |
| elif ext == ".pdf": | |
| text = "" | |
| with open(file_path, "rb") as f: | |
| reader = PyPDF2.PdfReader(f) | |
| for page in reader.pages: | |
| page_text = page.extract_text() or "" | |
| text += page_text + "\n" | |
| return text.strip() | |
| elif ext == ".docx": | |
| doc = Document(file_path) | |
| return "\n".join(p.text for p in doc.paragraphs if p.text.strip()).strip() | |
| else: | |
| return "β Supported formats: .txt, .pdf, .docx" | |
| except Exception as e: | |
| return f"Error reading file: {str(e)}" | |
| # Summarization Function | |
| def summarize_document(file): | |
| if not file: | |
| return "Please upload a document." | |
| text = extract_text(file.name) | |
| if text.startswith("β") or text.startswith("Error"): | |
| return text | |
| if len(text.strip()) < 80: | |
| return "Not enough meaningful text extracted." | |
| # Truncate long texts to avoid timeouts | |
| if len(text) > 18000: | |
| text = text[:18000] | |
| warning = "β οΈ Document truncated to ~18k characters for processing.\n\n" | |
| else: | |
| warning = "" | |
| try: | |
| response = summarize_chain.invoke({"text": text}) | |
| summary = response.content.strip() | |
| return warning + summary if summary else "No summary generated." | |
| except Exception as e: | |
| err = str(e).lower() | |
| if "token" in err or "authorization" in err: | |
| return "β Hugging Face token invalid or missing." | |
| if "rate limit" in err: | |
| return "β Rate limit reached. Try later." | |
| return f"β Error: {str(e)}" | |
| # Gradio Interface | |
| with gr.Blocks(title="Document Summarizer") as demo: | |
| gr.Markdown("# π Document Summarizer") | |
| gr.Markdown("Upload TXT, PDF, or DOCX and get an AI summary using Qwen2.5-7B-Instruct via Hugging Face.") | |
| file_input = gr.File( | |
| label="Upload Document", | |
| file_types=[".txt", ".pdf", ".docx"], | |
| type="filepath" | |
| ) | |
| btn = gr.Button("Generate Summary", variant="primary") | |
| output = gr.Textbox( | |
| label="Summary", | |
| lines=14, | |
| placeholder="Summary will appear here..." | |
| ) | |
| btn.click( | |
| fn=summarize_document, | |
| inputs=file_input, | |
| outputs=output | |
| ) | |
| gr.Markdown(""" | |
| **Notes**: | |
| - Powered by Hugging Face Inference API. | |
| - Free tier has rate limits. | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |