Spaces:

build-small-hackathon
/

documind

Running

App Files Files Community

documind / app.py

BingeAIGC

Upload app.py

de1fb11 verified 22 days ago

Raw

History Blame Contribute Delete

26 kB

	import gradio as gr
	from PyPDF2 import PdfReader
	from docx import Document
	from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration
	from PIL import Image
	import easyocr
	import tempfile
	import os
	import re
	import numpy as np
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity

	try:
	import spaces
	HF_SPACES = True
	except ImportError:
	HF_SPACES = False

	def gpu_decorator(func):
	if HF_SPACES:
	return spaces.GPU(func)
	return func

	# 全局变量
	parsed_text = ""
	generator = None
	blip_processor = None
	blip_model = None
	ocr_reader = None

	# 问答功能相关变量
	doc_chunks = []
	tfidf_vectorizer = None
	tfidf_matrix = None

	def load_model():
	"""加载AI模型"""
	global generator, blip_processor, blip_model, ocr_reader
	print("正在加载AI模型...")
	generator = pipeline("text-generation", model="gpt2")
	blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
	ocr_reader = easyocr.Reader(['en', 'ch_sim'])
	print("模型加载完成！")

	# ==================== 文档分析功能 ====================
	def extract_text_from_file(file):
	"""解析文档"""
	global parsed_text

	if file is None:
	return "", "", "请上传文件"

	filename = file.name

	try:
	if filename.endswith('.pdf'):
	reader = PdfReader(filename)
	text = ""
	page_count = len(reader.pages)
	for page in reader.pages:
	text += page.extract_text() + "\n"

	elif filename.endswith('.docx'):
	doc = Document(filename)
	text = "\n".join([para.text for para in doc.paragraphs])
	page_count = 1

	elif filename.endswith('.txt'):
	with open(filename, 'r', encoding='utf-8') as f:
	text = f.read()
	page_count = 1

	else:
	return "", "", f"❌ 不支持的文件格式：{filename}"

	parsed_text = text

	has_chinese = any('\u4e00' <= char <= '\u9fff' for char in text)
	lang = "中文" if has_chinese else "English"

	file_size = len(text.encode('utf-8'))
	if file_size > 1024 * 1024:
	size_str = f"{file_size / (1024 * 1024):.2f} MB"
	else:
	size_str = f"{file_size / 1024:.2f} KB"

	file_info = f"📄 {filename}\n📏 Size: {size_str} \| Pages: {page_count} \| Language: {lang}"
	result = f"📖 文档内容（前2000字）：\n\n{text[:2000]}"

	return result, "", file_info

	except Exception as e:
	return "", "", f"❌ 解析出错：{str(e)}"

	@gpu_decorator
	def generate_ai_summary():
	"""使用AI模型生成摘要"""
	global parsed_text, generator

	if not parsed_text:
	return "请先上传并解析文档"

	if generator is None:
	return "AI模型未加载，请重启应用"

	try:
	has_chinese = any('\u4e00' <= char <= '\u9fff' for char in parsed_text)

	if has_chinese:
	sentences = parsed_text.replace('\n', '。').split('。')
	key_sentences = [s.strip() for s in sentences if len(s.strip()) > 10][:5]
	summary = '。'.join(key_sentences) + '。'
	else:
	input_text = parsed_text[:2000]
	prompt = f"Please summarize:\n\n{input_text}\n\nSummary:"

	result = generator(
	prompt,
	max_new_tokens=300,
	num_return_sequences=1,
	temperature=0.7,
	do_sample=True,
	pad_token_id=50256
	)

	summary = result[0]['generated_text']
	if "Summary:" in summary:
	summary = summary.split("Summary:")[-1].strip()

	return summary
	except Exception as e:
	return f"AI摘要生成出错：{str(e)}"

	# ==================== 问答功能 ====================
	def chunk_text(text, chunk_size=500, overlap=50):
	"""将文本分割成小块"""
	if not text:
	return []

	chunks = []
	start = 0
	while start < len(text):
	end = start + chunk_size
	chunk = text[start:end]
	if chunk.strip():
	chunks.append(chunk.strip())
	start = end - overlap
	return chunks

	def build_qa_index():
	"""构建文档问答索引"""
	global parsed_text, doc_chunks, tfidf_vectorizer, tfidf_matrix

	if not parsed_text:
	return "请先上传并解析文档"

	doc_chunks = chunk_text(parsed_text)

	if not doc_chunks:
	return "文档内容为空，无法建立索引"

	tfidf_vectorizer = TfidfVectorizer(
	max_features=5000,
	ngram_range=(1, 2),
	analyzer='char',
	token_pattern=r'(?u)\b\w+\b'
	)
	tfidf_matrix = tfidf_vectorizer.fit_transform(doc_chunks)

	return f"✅ 已建立问答索引，共 {len(doc_chunks)} 个文本块"

	def search_similar_chunks(query, top_k=3):
	"""搜索与问题最相关的文本块"""
	global doc_chunks, tfidf_vectorizer, tfidf_matrix

	if not doc_chunks or tfidf_vectorizer is None or tfidf_matrix is None:
	return []

	query_vec = tfidf_vectorizer.transform([query])
	similarities = cosine_similarity(query_vec, tfidf_matrix).flatten()

	top_indices = similarities.argsort()[-top_k:][::-1]

	results = []
	for idx in top_indices:
	if similarities[idx] > 0.001:
	results.append({
	'chunk': doc_chunks[idx],
	'score': float(similarities[idx]),
	'index': int(idx)
	})

	return results

	@gpu_decorator
	def answer_question(question):
	"""基于文档内容回答问题"""
	global generator, parsed_text

	if not parsed_text:
	return "⚠️ 请先上传并解析文档", ""

	if not question:
	return "⚠️ 请输入您的问题", ""

	if generator is None:
	return "⚠️ AI模型未加载，请重启应用", ""

	similar_chunks = search_similar_chunks(question, top_k=3)

	if not similar_chunks:
	return "⚠️ 未找到与问题相关的内容", ""

	context = "\n\n".join([f"[段落{i+1}] {c['chunk']}" for i, c in enumerate(similar_chunks)])
	relevance_info = "\n".join([f"• 段落{c['index']+1}: 相关度 {c['score']:.1%}" for c in similar_chunks])

	has_chinese = any('\u4e00' <= char <= '\u9fff' for char in question)

	if has_chinese:
	answer = f"📖 参考内容：\n\n{context[:800]}\n\n"
	answer += f"⚠️ 中文问答需要更大的多语言模型支持。\n"
	answer += f"💡 请参考上方相关段落内容来回答您的问题。"
	else:
	prompt = f"""Based on the following context, answer the question.

	Context:
	{context[:600]}

	Question: {question}

	Answer:"""

	try:
	result = generator(
	prompt,
	max_new_tokens=150,
	num_return_sequences=1,
	temperature=0.5,
	do_sample=True,
	pad_token_id=50256
	)

	generated = result[0]['generated_text']
	if "Answer:" in generated:
	answer = generated.split("Answer:")[-1].strip()
	else:
	answer = generated[-150:]
	except Exception as e:
	answer = f"生成回答时出错：{str(e)}"

	return answer, relevance_info

	def clear_qa():
	"""清除问答内容"""
	return "", "", ""

	@gpu_decorator
	def rewrite_document(prompt):
	"""根据用户提示词改写文档"""
	global parsed_text, generator

	if not parsed_text:
	return "请先上传并解析文档"

	if not prompt:
	return "请输入改写要求"

	if generator is None:
	return "AI模型未加载，请重启应用"

	try:
	has_chinese = any('\u4e00' <= char <= '\u9fff' for char in parsed_text)

	if has_chinese:
	result = f"📌 改写要求：{prompt}\n\n"
	result += f"📝 原文摘要：\n{parsed_text[:300]}...\n\n"
	result += f"⚠️ 中文改写功能需要更大的模型支持，当前GPT-2主要支持英文。"
	else:
	input_text = parsed_text[:2000]
	prompt_text = f"Please rewrite the following text according to this instruction: {prompt}\n\nOriginal text:\n{input_text}\n\nRewritten text:"

	result = generator(
	prompt_text,
	max_new_tokens=500,
	num_return_sequences=1,
	temperature=0.8,
	do_sample=True,
	pad_token_id=50256
	)

	result = result[0]['generated_text']
	if "Rewritten text:" in result:
	result = result.split("Rewritten text:")[-1].strip()

	return result
	except Exception as e:
	return f"改写出错：{str(e)}"

	# ==================== 图片分析功能 ====================
	@gpu_decorator
	def describe_image(image):
	"""使用BLIP模型描述图片"""
	global blip_processor, blip_model

	if image is None:
	return "请上传图片"

	if blip_processor is None or blip_model is None:
	return "BLIP模型未加载，请重启应用"

	try:
	inputs = blip_processor(image, return_tensors="pt")
	out = blip_model.generate(**inputs, max_new_tokens=50)
	description = blip_processor.decode(out[0], skip_special_tokens=True)

	return f"🖼️ 图片描述：\n\n{description}"

	except Exception as e:
	return f"图片描述出错：{str(e)}"

	@gpu_decorator
	def ocr_image(image):
	"""使用EasyOCR识别图片文字"""
	global ocr_reader

	if image is None:
	return "请上传图片"

	if ocr_reader is None:
	return "OCR模型未加载，请重启应用"

	try:
	import numpy as np
	img_array = np.array(image)
	result = ocr_reader.readtext(img_array)

	if not result:
	return "未识别到文字内容"

	texts = []
	for detection in result:
	text = detection[1]
	confidence = detection[2]
	texts.append(f"{text} (置信度: {confidence:.2%})")

	output = "📝 OCR识别结果：\n\n" + "\n".join(texts)
	return output

	except Exception as e:
	return f"OCR识别出错：{str(e)}"

	def clear_doc():
	"""清除文档分析内容"""
	global parsed_text, doc_chunks, tfidf_vectorizer, tfidf_matrix
	parsed_text = ""
	doc_chunks = []
	tfidf_vectorizer = None
	tfidf_matrix = None
	return None, "", "", "", "", "", "", "", "", None, None, None

	def clear_img():
	"""清除图片分析内容"""
	return None, "", "", None, None

	def download_text(text, filename="result"):
	"""下载文本内容"""
	try:
	if not text or text.strip() == "":
	return None
	safe_name = "".join(c for c in filename if c.isalnum() or c in ('_', '-'))[:30]
	tmp = tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False, encoding='utf-8', prefix=f"{safe_name}_")
	tmp.write(text)
	tmp.flush()
	tmp.close()
	print(f"Download file created: {tmp.name}, size: {os.path.getsize(tmp.name)}")
	return tmp.name
	except Exception as e:
	print(f"Download error: {e}")
	import traceback
	traceback.print_exc()
	return None

	# 加载模型
	load_model()

	# 创建Gradio界面
	with gr.Blocks(
	title="DocuMind - Smart Document Analyzer"
	) as demo:
	gr.HTML("""
	<div style="text-align: center; margin-bottom: 15px;">
	<h1 style="margin: 0; color: #e2e8f0;">🧠 DocuMind</h1>
	<p style="color: #94a3b8; margin: 5px 0 0 0; font-size: 16px;">Smart Document Analyzer \| 智能文档分析助手</p>
	</div>
	""")

	gr.HTML("""
	<div style="padding: 8px 12px; background: rgba(99, 102, 241, 0.1); border-radius: 6px; margin-bottom: 10px; border-left: 3px solid #6366f1;">
	<p style="margin: 0 0 5px 0; color: #e2e8f0; font-weight: bold;">📌 How to Use \| 使用说明</p>
	<p style="margin: 0 0 3px 0; color: #94a3b8; font-size: 12px;">
	📄 <strong>Document Analysis:</strong> Upload Document → Parse → Summary/Rewrite/Q&A → Download
	</p>
	<p style="margin: 0 0 3px 0; color: #94a3b8; font-size: 12px;">
	🖼️ <strong>Image Analysis:</strong> Upload Image → Choose Analysis Type → View Results → Download
	</p>
	<p style="margin: 0 0 3px 0; color: #22c55e; font-size: 11px;">
	🤖 AI Models: GPT-2 (1.5B) + BLIP (0.4B) \| Tools: EasyOCR + TF-IDF \| ✅ Hackathon Compliant (≤32B)
	</p>
	</div>
	""")

	with gr.Tabs() as main_tabs:
	# ==================== 文档分析功能 ====================
	with gr.TabItem("📄 Document Analysis \| 文档分析", id="doc"):
	gr.HTML("""
	<div style="padding: 8px 12px; background: rgba(99, 102, 241, 0.1); border-radius: 6px; margin-bottom: 10px;">
	<p style="margin: 0; color: #94a3b8; font-size: 13px;">
	<strong>📌 How to Use:</strong> Upload Document → Parse → Summary/Rewrite/Q&A → Download
	</p>
	<p style="margin: 3px 0 0 0; color: #22c55e; font-size: 11px;">
	🤖 AI Models: GPT-2 (1.5B) - Summary/Rewrite/Q&A \| TF-IDF - Document search \| ✅ Hackathon Compliant
	</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=4):
	gr.Markdown("#### Step 1: 📤 Upload")
	file_input = gr.File(
	label="Upload PDF/Word/TXT",
	file_types=[".pdf", ".docx", ".txt"],
	height=150
	)
	file_info = gr.Textbox(label="File Info \| 文件信息", lines=2, interactive=False)

	gr.Markdown("#### Step 2: 📤 Parse")
	submit_btn = gr.Button("📤 Parse Document \| 解析文档", variant="primary", size="lg")

	with gr.Column(scale=6):
	gr.Markdown("#### Step 3: 📝 Results")
	with gr.Tabs():
	with gr.TabItem("📝 Content \| 内容"):
	output_text = gr.Textbox(label="Document Content \| 文档内容", lines=10, interactive=False)
	download_content = gr.Button("⬇️ Download Content \| 下载内容", size="sm")
	download_content_file = gr.File(label="Download Content \| 下载内容")

	with gr.TabItem("🤖 Summary \| 摘要"):
	summary_text = gr.Textbox(label="AI Summary \| AI摘要", lines=8, interactive=False)
	summary_btn = gr.Button("🤖 Generate Summary \| 生成摘要", variant="secondary", size="sm")
	download_summary = gr.Button("⬇️ Download Summary \| 下载摘要", size="sm")
	download_summary_file = gr.File(label="Download Summary \| 下载摘要")

	with gr.TabItem("✍️ Rewrite \| 改写"):
	custom_prompt = gr.Textbox(
	label="📝 Custom Prompt \| 自定义提示词",
	placeholder="e.g., Rewrite in formal English / Simplify for kids...",
	lines=2
	)
	rewrite_btn = gr.Button("✍️ Rewrite \| 改写", variant="secondary", size="sm")
	rewrite_output = gr.Textbox(label="Rewrite Result \| 改写结果", lines=6, interactive=False)
	download_rewrite = gr.Button("⬇️ Download Rewrite \| 下载改写", size="sm")
	download_rewrite_file = gr.File(label="Download Rewrite \| 下载改写")

	with gr.TabItem("💬 Q&A \| 问答"):
	gr.HTML("""
	<div style="padding: 6px 10px; background: rgba(34, 197, 94, 0.1); border-radius: 4px; margin-bottom: 8px;">
	<p style="margin: 0 0 3px 0; color: #22c55e; font-size: 11px;">
	🤖 AI Model: GPT-2 (1.5B) + TF-IDF \| Document Q&A \| ✅ Hackathon Compliant
	</p>
	<p style="margin: 0; color: #94a3b8; font-size: 11px;">
	<strong>📌 Steps:</strong> 1. Click Build Index → 2. Enter question → 3. Click Get Answer
	</p>
	</div>
	""")
	qa_index_btn = gr.Button("📇 Build Index \| 建立索引", variant="secondary", size="sm")
	qa_index_info = gr.Textbox(label="Index Status \| 索引状态", lines=1, interactive=False)
	qa_question = gr.Textbox(
	label="❓ Your Question \| 您的问题",
	placeholder="e.g., What is the main topic? / 这篇文档的主要内容是什么？",
	lines=2
	)
	qa_btn = gr.Button("💬 Get Answer \| 获取回答", variant="primary", size="sm")
	qa_relevance = gr.Textbox(label="Relevance \| 相关段落", lines=2, interactive=False)
	qa_answer = gr.Textbox(label="Answer \| 回答", lines=6, interactive=False)

	clear_doc_btn = gr.Button("🗑️ Clear \| 清除", variant="stop", size="sm")

	# ==================== 图片分析功能 ====================
	with gr.TabItem("🖼️ Image Analysis \| 图片分析", id="img"):
	gr.HTML("""
	<div style="padding: 8px 12px; background: rgba(99, 102, 241, 0.1); border-radius: 6px; margin-bottom: 10px;">
	<p style="margin: 0; color: #94a3b8; font-size: 13px;">
	<strong>📌 How to Use:</strong> Upload Image → Choose Analysis Type → View Results → Download
	</p>
	<p style="margin: 3px 0 0 0; color: #22c55e; font-size: 11px;">
	🤖 AI Models: BLIP (0.4B) - Image description \| EasyOCR - Text recognition \| ✅ Hackathon Compliant
	</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=4):
	gr.Markdown("#### Step 1: 📤 Upload Image")
	image_input = gr.Image(
	label="Upload Image \| 上传图片",
	type="pil",
	sources=["upload", "clipboard", "webcam"],
	height=300
	)

	gr.HTML("""
	<div style="padding: 8px 12px; background: rgba(234, 179, 8, 0.1); border-radius: 4px; margin-bottom: 8px;">
	<p style="margin: 0 0 5px 0; color: #eab308; font-size: 12px; font-weight: bold;">📷 图片输入方式说明 \| Image Input Methods:</p>
	<p style="margin: 0 0 3px 0; color: #e2e8f0; font-size: 11px;">
	<strong>1. 上传 (Upload):</strong> 点击上传按钮，选择本地图片文件
	</p>
	<p style="margin: 0 0 3px 0; color: #e2e8f0; font-size: 11px;">
	<strong>2. 剪贴板 (Clipboard):</strong> 先复制图片（Ctrl+C），然后点击剪贴板图标粘贴
	</p>
	<p style="margin: 0 0 3px 0; color: #e2e8f0; font-size: 11px;">
	<strong>3. 摄像头 (Webcam):</strong> 点击摄像头图标 → 允许权限 → 对准文档 → 拍照
	</p>
	<p style="margin: 0; color: #ef4444; font-size: 10px;">
	⚠️ 注意：请先完成一种输入方式，再使用另一种。不要同时打开多个输入对话框。
	</p>
	</div>
	""")

	gr.Markdown("#### Step 2: 🔍 Choose Analysis")
	analyze_btn = gr.Button("🔍 Analyze Image \| 分析图片", variant="primary", size="lg")

	with gr.Column(scale=6):
	gr.Markdown("#### Step 3: 📝 Results")
	with gr.Tabs():
	with gr.TabItem("📝 Description \| 图片描述"):
	gr.HTML("""
	<div style="padding: 6px 10px; background: rgba(34, 197, 94, 0.1); border-radius: 4px; margin-bottom: 8px;">
	<p style="margin: 0; color: #22c55e; font-size: 11px;">
	🤖 Model: BLIP (0.4B) \| AI Image Captioning \| ✅ Hackathon Compliant
	</p>
	</div>
	""")
	desc_output = gr.Textbox(label="Image Description \| 图片描述", lines=8, interactive=False)
	download_desc = gr.Button("⬇️ Download Description \| 下载描述", size="sm")
	download_desc_file = gr.File(label="image_description.txt")

	with gr.TabItem("🔍 OCR \| 文字识别"):
	gr.HTML("""
	<div style="padding: 6px 10px; background: rgba(234, 179, 8, 0.1); border-radius: 4px; margin-bottom: 8px;">
	<p style="margin: 0; color: #eab308; font-size: 11px;">
	🔧 Tool: EasyOCR \| Text extraction from images
	</p>
	</div>
	""")
	ocr_output = gr.Textbox(label="OCR Results \| 识别结果", lines=8, interactive=False)
	download_ocr = gr.Button("⬇️ Download OCR Result \| 下载识别结果", size="sm")
	download_ocr_file = gr.File(label="ocr_result.txt")

	clear_img_btn = gr.Button("🗑️ Clear \| 清除", variant="stop", size="sm")

	gr.HTML("""
	<div style="text-align: center; margin-top: 12px; padding: 8px; color: #64748b; font-size: 11px;">
	<p style="margin: 2px 0;">📧 Built for Build Small Hackathon \| 为黑客松而建</p>
	<p style="margin: 2px 0;">⏱️ Deadline: June 15, 2026 \| 截止日期：2026年6月15日</p>
	<p style="margin: 2px 0;">© 2026 Binge-666 \| <a href="https://github.com/Binge-666" target="_blank" style="color: #6366f1;">github.com/Binge-666</a></p>
	</div>
	""")

	# ==================== 事件绑定 ====================
	# 文档分析
	submit_btn.click(
	fn=extract_text_from_file,
	inputs=[file_input],
	outputs=[output_text, summary_text, file_info]
	)

	summary_btn.click(
	fn=generate_ai_summary,
	inputs=[],
	outputs=[summary_text]
	)

	rewrite_btn.click(
	fn=rewrite_document,
	inputs=[custom_prompt],
	outputs=[rewrite_output]
	)

	qa_index_btn.click(
	fn=build_qa_index,
	inputs=[],
	outputs=[qa_index_info]
	)

	qa_btn.click(
	fn=answer_question,
	inputs=[qa_question],
	outputs=[qa_answer, qa_relevance]
	)

	clear_doc_btn.click(
	fn=clear_doc,
	inputs=[],
	outputs=[file_input, file_info, output_text, summary_text, custom_prompt, qa_index_info, qa_question, qa_answer, rewrite_output, download_content_file, download_summary_file, download_rewrite_file]
	)

	# 图片分析
	analyze_btn.click(
	fn=lambda img: [describe_image(img), ocr_image(img)],
	inputs=[image_input],
	outputs=[desc_output, ocr_output]
	)

	clear_img_btn.click(
	fn=clear_img,
	inputs=[],
	outputs=[image_input, desc_output, ocr_output, download_desc_file, download_ocr_file]
	)

	# 下载功能
	download_desc.click(
	fn=download_text,
	inputs=[desc_output],
	outputs=[download_desc_file]
	)

	download_ocr.click(
	fn=download_text,
	inputs=[ocr_output],
	outputs=[download_ocr_file]
	)

	download_content.click(
	fn=download_text,
	inputs=[output_text],
	outputs=[download_content_file]
	)

	download_summary.click(
	fn=download_text,
	inputs=[summary_text],
	outputs=[download_summary_file]
	)

	download_rewrite.click(
	fn=download_text,
	inputs=[rewrite_output],
	outputs=[download_rewrite_file]
	)

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	theme=gr.themes.Base(
	primary_hue="indigo",
	secondary_hue="slate",
	neutral_hue="slate"
	)
	)