import gradio as gr from PyPDF2 import PdfReader from docx import Document from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration from PIL import Image import easyocr import tempfile import os import re import numpy as np from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity try: import spaces HF_SPACES = True except ImportError: HF_SPACES = False def gpu_decorator(func): if HF_SPACES: return spaces.GPU(func) return func # 全局变量 parsed_text = "" generator = None blip_processor = None blip_model = None ocr_reader = None # 问答功能相关变量 doc_chunks = [] tfidf_vectorizer = None tfidf_matrix = None def load_model(): """加载AI模型""" global generator, blip_processor, blip_model, ocr_reader print("正在加载AI模型...") generator = pipeline("text-generation", model="gpt2") blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") ocr_reader = easyocr.Reader(['en', 'ch_sim']) print("模型加载完成!") # ==================== 文档分析功能 ==================== def extract_text_from_file(file): """解析文档""" global parsed_text if file is None: return "", "", "请上传文件" filename = file.name try: if filename.endswith('.pdf'): reader = PdfReader(filename) text = "" page_count = len(reader.pages) for page in reader.pages: text += page.extract_text() + "\n" elif filename.endswith('.docx'): doc = Document(filename) text = "\n".join([para.text for para in doc.paragraphs]) page_count = 1 elif filename.endswith('.txt'): with open(filename, 'r', encoding='utf-8') as f: text = f.read() page_count = 1 else: return "", "", f"❌ 不支持的文件格式:{filename}" parsed_text = text has_chinese = any('\u4e00' <= char <= '\u9fff' for char in text) lang = "中文" if has_chinese else "English" file_size = len(text.encode('utf-8')) if file_size > 1024 * 1024: size_str = f"{file_size / (1024 * 1024):.2f} MB" else: size_str = f"{file_size / 1024:.2f} KB" file_info = f"📄 {filename}\n📏 Size: {size_str} | Pages: {page_count} | Language: {lang}" result = f"📖 文档内容(前2000字):\n\n{text[:2000]}" return result, "", file_info except Exception as e: return "", "", f"❌ 解析出错:{str(e)}" @gpu_decorator def generate_ai_summary(): """使用AI模型生成摘要""" global parsed_text, generator if not parsed_text: return "请先上传并解析文档" if generator is None: return "AI模型未加载,请重启应用" try: has_chinese = any('\u4e00' <= char <= '\u9fff' for char in parsed_text) if has_chinese: sentences = parsed_text.replace('\n', '。').split('。') key_sentences = [s.strip() for s in sentences if len(s.strip()) > 10][:5] summary = '。'.join(key_sentences) + '。' else: input_text = parsed_text[:2000] prompt = f"Please summarize:\n\n{input_text}\n\nSummary:" result = generator( prompt, max_new_tokens=300, num_return_sequences=1, temperature=0.7, do_sample=True, pad_token_id=50256 ) summary = result[0]['generated_text'] if "Summary:" in summary: summary = summary.split("Summary:")[-1].strip() return summary except Exception as e: return f"AI摘要生成出错:{str(e)}" # ==================== 问答功能 ==================== def chunk_text(text, chunk_size=500, overlap=50): """将文本分割成小块""" if not text: return [] chunks = [] start = 0 while start < len(text): end = start + chunk_size chunk = text[start:end] if chunk.strip(): chunks.append(chunk.strip()) start = end - overlap return chunks def build_qa_index(): """构建文档问答索引""" global parsed_text, doc_chunks, tfidf_vectorizer, tfidf_matrix if not parsed_text: return "请先上传并解析文档" doc_chunks = chunk_text(parsed_text) if not doc_chunks: return "文档内容为空,无法建立索引" tfidf_vectorizer = TfidfVectorizer( max_features=5000, ngram_range=(1, 2), analyzer='char', token_pattern=r'(?u)\b\w+\b' ) tfidf_matrix = tfidf_vectorizer.fit_transform(doc_chunks) return f"✅ 已建立问答索引,共 {len(doc_chunks)} 个文本块" def search_similar_chunks(query, top_k=3): """搜索与问题最相关的文本块""" global doc_chunks, tfidf_vectorizer, tfidf_matrix if not doc_chunks or tfidf_vectorizer is None or tfidf_matrix is None: return [] query_vec = tfidf_vectorizer.transform([query]) similarities = cosine_similarity(query_vec, tfidf_matrix).flatten() top_indices = similarities.argsort()[-top_k:][::-1] results = [] for idx in top_indices: if similarities[idx] > 0.001: results.append({ 'chunk': doc_chunks[idx], 'score': float(similarities[idx]), 'index': int(idx) }) return results @gpu_decorator def answer_question(question): """基于文档内容回答问题""" global generator, parsed_text if not parsed_text: return "⚠️ 请先上传并解析文档", "" if not question: return "⚠️ 请输入您的问题", "" if generator is None: return "⚠️ AI模型未加载,请重启应用", "" similar_chunks = search_similar_chunks(question, top_k=3) if not similar_chunks: return "⚠️ 未找到与问题相关的内容", "" context = "\n\n".join([f"[段落{i+1}] {c['chunk']}" for i, c in enumerate(similar_chunks)]) relevance_info = "\n".join([f"• 段落{c['index']+1}: 相关度 {c['score']:.1%}" for c in similar_chunks]) has_chinese = any('\u4e00' <= char <= '\u9fff' for char in question) if has_chinese: answer = f"📖 参考内容:\n\n{context[:800]}\n\n" answer += f"⚠️ 中文问答需要更大的多语言模型支持。\n" answer += f"💡 请参考上方相关段落内容来回答您的问题。" else: prompt = f"""Based on the following context, answer the question. Context: {context[:600]} Question: {question} Answer:""" try: result = generator( prompt, max_new_tokens=150, num_return_sequences=1, temperature=0.5, do_sample=True, pad_token_id=50256 ) generated = result[0]['generated_text'] if "Answer:" in generated: answer = generated.split("Answer:")[-1].strip() else: answer = generated[-150:] except Exception as e: answer = f"生成回答时出错:{str(e)}" return answer, relevance_info def clear_qa(): """清除问答内容""" return "", "", "" @gpu_decorator def rewrite_document(prompt): """根据用户提示词改写文档""" global parsed_text, generator if not parsed_text: return "请先上传并解析文档" if not prompt: return "请输入改写要求" if generator is None: return "AI模型未加载,请重启应用" try: has_chinese = any('\u4e00' <= char <= '\u9fff' for char in parsed_text) if has_chinese: result = f"📌 改写要求:{prompt}\n\n" result += f"📝 原文摘要:\n{parsed_text[:300]}...\n\n" result += f"⚠️ 中文改写功能需要更大的模型支持,当前GPT-2主要支持英文。" else: input_text = parsed_text[:2000] prompt_text = f"Please rewrite the following text according to this instruction: {prompt}\n\nOriginal text:\n{input_text}\n\nRewritten text:" result = generator( prompt_text, max_new_tokens=500, num_return_sequences=1, temperature=0.8, do_sample=True, pad_token_id=50256 ) result = result[0]['generated_text'] if "Rewritten text:" in result: result = result.split("Rewritten text:")[-1].strip() return result except Exception as e: return f"改写出错:{str(e)}" # ==================== 图片分析功能 ==================== @gpu_decorator def describe_image(image): """使用BLIP模型描述图片""" global blip_processor, blip_model if image is None: return "请上传图片" if blip_processor is None or blip_model is None: return "BLIP模型未加载,请重启应用" try: inputs = blip_processor(image, return_tensors="pt") out = blip_model.generate(**inputs, max_new_tokens=50) description = blip_processor.decode(out[0], skip_special_tokens=True) return f"🖼️ 图片描述:\n\n{description}" except Exception as e: return f"图片描述出错:{str(e)}" @gpu_decorator def ocr_image(image): """使用EasyOCR识别图片文字""" global ocr_reader if image is None: return "请上传图片" if ocr_reader is None: return "OCR模型未加载,请重启应用" try: import numpy as np img_array = np.array(image) result = ocr_reader.readtext(img_array) if not result: return "未识别到文字内容" texts = [] for detection in result: text = detection[1] confidence = detection[2] texts.append(f"{text} (置信度: {confidence:.2%})") output = "📝 OCR识别结果:\n\n" + "\n".join(texts) return output except Exception as e: return f"OCR识别出错:{str(e)}" def clear_doc(): """清除文档分析内容""" global parsed_text, doc_chunks, tfidf_vectorizer, tfidf_matrix parsed_text = "" doc_chunks = [] tfidf_vectorizer = None tfidf_matrix = None return None, "", "", "", "", "", "", "", "", None, None, None def clear_img(): """清除图片分析内容""" return None, "", "", None, None def download_text(text, filename="result"): """下载文本内容""" try: if not text or text.strip() == "": return None safe_name = "".join(c for c in filename if c.isalnum() or c in ('_', '-'))[:30] tmp = tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False, encoding='utf-8', prefix=f"{safe_name}_") tmp.write(text) tmp.flush() tmp.close() print(f"Download file created: {tmp.name}, size: {os.path.getsize(tmp.name)}") return tmp.name except Exception as e: print(f"Download error: {e}") import traceback traceback.print_exc() return None # 加载模型 load_model() # 创建Gradio界面 with gr.Blocks( title="DocuMind - Smart Document Analyzer" ) as demo: gr.HTML("""

🧠 DocuMind

Smart Document Analyzer | 智能文档分析助手

""") gr.HTML("""

📌 How to Use | 使用说明

📄 Document Analysis: Upload Document → Parse → Summary/Rewrite/Q&A → Download

🖼️ Image Analysis: Upload Image → Choose Analysis Type → View Results → Download

🤖 AI Models: GPT-2 (1.5B) + BLIP (0.4B) | Tools: EasyOCR + TF-IDF | ✅ Hackathon Compliant (≤32B)

""") with gr.Tabs() as main_tabs: # ==================== 文档分析功能 ==================== with gr.TabItem("📄 Document Analysis | 文档分析", id="doc"): gr.HTML("""

📌 How to Use: Upload Document → Parse → Summary/Rewrite/Q&A → Download

🤖 AI Models: GPT-2 (1.5B) - Summary/Rewrite/Q&A | TF-IDF - Document search | ✅ Hackathon Compliant

""") with gr.Row(): with gr.Column(scale=4): gr.Markdown("#### Step 1: 📤 Upload") file_input = gr.File( label="Upload PDF/Word/TXT", file_types=[".pdf", ".docx", ".txt"], height=150 ) file_info = gr.Textbox(label="File Info | 文件信息", lines=2, interactive=False) gr.Markdown("#### Step 2: 📤 Parse") submit_btn = gr.Button("📤 Parse Document | 解析文档", variant="primary", size="lg") with gr.Column(scale=6): gr.Markdown("#### Step 3: 📝 Results") with gr.Tabs(): with gr.TabItem("📝 Content | 内容"): output_text = gr.Textbox(label="Document Content | 文档内容", lines=10, interactive=False) download_content = gr.Button("⬇️ Download Content | 下载内容", size="sm") download_content_file = gr.File(label="Download Content | 下载内容") with gr.TabItem("🤖 Summary | 摘要"): summary_text = gr.Textbox(label="AI Summary | AI摘要", lines=8, interactive=False) summary_btn = gr.Button("🤖 Generate Summary | 生成摘要", variant="secondary", size="sm") download_summary = gr.Button("⬇️ Download Summary | 下载摘要", size="sm") download_summary_file = gr.File(label="Download Summary | 下载摘要") with gr.TabItem("✍️ Rewrite | 改写"): custom_prompt = gr.Textbox( label="📝 Custom Prompt | 自定义提示词", placeholder="e.g., Rewrite in formal English / Simplify for kids...", lines=2 ) rewrite_btn = gr.Button("✍️ Rewrite | 改写", variant="secondary", size="sm") rewrite_output = gr.Textbox(label="Rewrite Result | 改写结果", lines=6, interactive=False) download_rewrite = gr.Button("⬇️ Download Rewrite | 下载改写", size="sm") download_rewrite_file = gr.File(label="Download Rewrite | 下载改写") with gr.TabItem("💬 Q&A | 问答"): gr.HTML("""

🤖 AI Model: GPT-2 (1.5B) + TF-IDF | Document Q&A | ✅ Hackathon Compliant

📌 Steps: 1. Click Build Index → 2. Enter question → 3. Click Get Answer

""") qa_index_btn = gr.Button("📇 Build Index | 建立索引", variant="secondary", size="sm") qa_index_info = gr.Textbox(label="Index Status | 索引状态", lines=1, interactive=False) qa_question = gr.Textbox( label="❓ Your Question | 您的问题", placeholder="e.g., What is the main topic? / 这篇文档的主要内容是什么?", lines=2 ) qa_btn = gr.Button("💬 Get Answer | 获取回答", variant="primary", size="sm") qa_relevance = gr.Textbox(label="Relevance | 相关段落", lines=2, interactive=False) qa_answer = gr.Textbox(label="Answer | 回答", lines=6, interactive=False) clear_doc_btn = gr.Button("🗑️ Clear | 清除", variant="stop", size="sm") # ==================== 图片分析功能 ==================== with gr.TabItem("🖼️ Image Analysis | 图片分析", id="img"): gr.HTML("""

📌 How to Use: Upload Image → Choose Analysis Type → View Results → Download

🤖 AI Models: BLIP (0.4B) - Image description | EasyOCR - Text recognition | ✅ Hackathon Compliant

""") with gr.Row(): with gr.Column(scale=4): gr.Markdown("#### Step 1: 📤 Upload Image") image_input = gr.Image( label="Upload Image | 上传图片", type="pil", sources=["upload", "clipboard", "webcam"], height=300 ) gr.HTML("""

📷 图片输入方式说明 | Image Input Methods:

1. 上传 (Upload): 点击上传按钮,选择本地图片文件

2. 剪贴板 (Clipboard): 先复制图片(Ctrl+C),然后点击剪贴板图标粘贴

3. 摄像头 (Webcam): 点击摄像头图标 → 允许权限 → 对准文档 → 拍照

⚠️ 注意:请先完成一种输入方式,再使用另一种。不要同时打开多个输入对话框。

""") gr.Markdown("#### Step 2: 🔍 Choose Analysis") analyze_btn = gr.Button("🔍 Analyze Image | 分析图片", variant="primary", size="lg") with gr.Column(scale=6): gr.Markdown("#### Step 3: 📝 Results") with gr.Tabs(): with gr.TabItem("📝 Description | 图片描述"): gr.HTML("""

🤖 Model: BLIP (0.4B) | AI Image Captioning | ✅ Hackathon Compliant

""") desc_output = gr.Textbox(label="Image Description | 图片描述", lines=8, interactive=False) download_desc = gr.Button("⬇️ Download Description | 下载描述", size="sm") download_desc_file = gr.File(label="image_description.txt") with gr.TabItem("🔍 OCR | 文字识别"): gr.HTML("""

🔧 Tool: EasyOCR | Text extraction from images

""") ocr_output = gr.Textbox(label="OCR Results | 识别结果", lines=8, interactive=False) download_ocr = gr.Button("⬇️ Download OCR Result | 下载识别结果", size="sm") download_ocr_file = gr.File(label="ocr_result.txt") clear_img_btn = gr.Button("🗑️ Clear | 清除", variant="stop", size="sm") gr.HTML("""

📧 Built for Build Small Hackathon | 为黑客松而建

⏱️ Deadline: June 15, 2026 | 截止日期:2026年6月15日

© 2026 Binge-666 | github.com/Binge-666

""") # ==================== 事件绑定 ==================== # 文档分析 submit_btn.click( fn=extract_text_from_file, inputs=[file_input], outputs=[output_text, summary_text, file_info] ) summary_btn.click( fn=generate_ai_summary, inputs=[], outputs=[summary_text] ) rewrite_btn.click( fn=rewrite_document, inputs=[custom_prompt], outputs=[rewrite_output] ) qa_index_btn.click( fn=build_qa_index, inputs=[], outputs=[qa_index_info] ) qa_btn.click( fn=answer_question, inputs=[qa_question], outputs=[qa_answer, qa_relevance] ) clear_doc_btn.click( fn=clear_doc, inputs=[], outputs=[file_input, file_info, output_text, summary_text, custom_prompt, qa_index_info, qa_question, qa_answer, rewrite_output, download_content_file, download_summary_file, download_rewrite_file] ) # 图片分析 analyze_btn.click( fn=lambda img: [describe_image(img), ocr_image(img)], inputs=[image_input], outputs=[desc_output, ocr_output] ) clear_img_btn.click( fn=clear_img, inputs=[], outputs=[image_input, desc_output, ocr_output, download_desc_file, download_ocr_file] ) # 下载功能 download_desc.click( fn=download_text, inputs=[desc_output], outputs=[download_desc_file] ) download_ocr.click( fn=download_text, inputs=[ocr_output], outputs=[download_ocr_file] ) download_content.click( fn=download_text, inputs=[output_text], outputs=[download_content_file] ) download_summary.click( fn=download_text, inputs=[summary_text], outputs=[download_summary_file] ) download_rewrite.click( fn=download_text, inputs=[rewrite_output], outputs=[download_rewrite_file] ) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False, theme=gr.themes.Base( primary_hue="indigo", secondary_hue="slate", neutral_hue="slate" ) )