Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,9 +23,49 @@ logger = logging.getLogger(__name__)
|
|
| 23 |
# تنظیم API Key
|
| 24 |
gemini_api_key = os.environ.get('GEMINI_API_KEY')
|
| 25 |
if not gemini_api_key:
|
| 26 |
-
raise ValueError("GOOGLE_API_KEY
|
| 27 |
genai.configure(api_key=gemini_api_key)
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
def process_single_pdf(pdf_file):
|
| 30 |
pdf_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
|
| 31 |
logger.info(f"Starting to process file: {pdf_path}")
|
|
@@ -299,6 +339,77 @@ def evaluate_quality(docs, sections):
|
|
| 299 |
|
| 300 |
llm_gemini = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=gemini_api_key, convert_system_message_to_human=True, temperature=0.5)
|
| 301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
academic_analysis_prompt = PromptTemplate(
|
| 303 |
template="""You are a professional academic analyst. Provide a deep and structured analysis of {section}:
|
| 304 |
1. Based solely on the provided text.
|
|
@@ -368,81 +479,14 @@ def create_conversation_chain(vector_store, docs, mode, language, detail_level,
|
|
| 368 |
chain = LLMChain(llm=llm_gemini, prompt=general_qa_prompt.partial(language=language))
|
| 369 |
return chain
|
| 370 |
|
| 371 |
-
|
| 372 |
-
start_time = time.time()
|
| 373 |
-
logger.info(f"Starting processing - Mode: {mode}, Question: {query}, Language: {language}, Detail: {detail_level}, Section: {section_dropdown}")
|
| 374 |
-
|
| 375 |
-
if mode != "Standard Response" and not pdf_file:
|
| 376 |
-
return "Please upload at least one PDF file."
|
| 377 |
-
|
| 378 |
-
if mode == "Standard Response":
|
| 379 |
-
chain = create_conversation_chain(None, None, mode, language, detail_level)
|
| 380 |
-
try:
|
| 381 |
-
result = chain.invoke({"question": query})["text"]
|
| 382 |
-
return f"{result}\n\n⏱ Processing time: {time.time() - start_time:.2f} seconds"
|
| 383 |
-
except Exception as e:
|
| 384 |
-
logger.error(f"Error in standard processing: {str(e)}")
|
| 385 |
-
return f"Error: {str(e)}"
|
| 386 |
-
|
| 387 |
-
pdf_files = pdf_file if isinstance(pdf_file, list) else [pdf_file]
|
| 388 |
-
_, docs, sections, error = upload_and_process_pdf(pdf_files)
|
| 389 |
-
if error:
|
| 390 |
-
return error
|
| 391 |
-
|
| 392 |
-
target_docs = docs if section_dropdown == "Entire Document" else sections.get(section_dropdown, docs)
|
| 393 |
-
context = " ".join([doc.page_content for doc in target_docs])
|
| 394 |
-
|
| 395 |
-
vector_store = None
|
| 396 |
-
if mode in ["Academic Analysis (RAG)", "Plagiarism Check", "Quality Evaluation"]:
|
| 397 |
-
vector_store, vectordb_error = create_vector_db(target_docs)
|
| 398 |
-
if vectordb_error:
|
| 399 |
-
return vectordb_error
|
| 400 |
-
|
| 401 |
-
chain = create_conversation_chain(vector_store, target_docs, mode, language, detail_level, section_dropdown)
|
| 402 |
-
try:
|
| 403 |
-
if mode == "Auto Summary":
|
| 404 |
-
time.sleep(2)
|
| 405 |
-
result = chain.invoke({"context": context[:5000]})["text"]
|
| 406 |
-
elif mode == "Plagiarism Check":
|
| 407 |
-
plagiarism_result = check_plagiarism(context)
|
| 408 |
-
result = plagiarism_result
|
| 409 |
-
elif mode == "Quality Evaluation":
|
| 410 |
-
score, explanation, suggestions, auto_fix = evaluate_quality(target_docs, sections)
|
| 411 |
-
time.sleep(2)
|
| 412 |
-
result = chain.invoke({"context": context[:5000], "score": score, "explanation": explanation, "suggestions": suggestions})["text"] + auto_fix
|
| 413 |
-
else:
|
| 414 |
-
result = chain.invoke({"question": query, "chat_history": []})["answer"]
|
| 415 |
-
|
| 416 |
-
if mode not in ["Plagiarism Check", "Quality Evaluation"]:
|
| 417 |
-
resources = suggest_resources(context)
|
| 418 |
-
result += "\n\n**Suggested Resources:**\n" + "\n".join(resources)
|
| 419 |
-
|
| 420 |
-
return f"{result}\n\n⏱ Processing time: {time.time() - start_time:.2f} seconds"
|
| 421 |
-
except Exception as e:
|
| 422 |
-
logger.error(f"Error in processing: {str(e)}")
|
| 423 |
-
if "429" in str(e):
|
| 424 |
-
return "Error: Rate limit exceeded for Gemini API. Please wait a few minutes and try again."
|
| 425 |
-
return f"Error: {str(e)}"
|
| 426 |
-
|
| 427 |
-
# CSS برای چپچین (LTR) و طراحی ساده با Light Mode
|
| 428 |
-
css = """
|
| 429 |
-
body {background-color: #f0f4f8; font-family: 'Arial', sans-serif; color: #2c3e50; transition: all 0.3s ease;}
|
| 430 |
-
.gr-button {background-color: #4CAF50; color: white; border: none; padding: 12px 25px; border-radius: 5px; margin-right: 15px; width: auto; display: inline-block; transition: all 0.3s ease;}
|
| 431 |
-
.gr-button:hover {background-color: #45a049;}
|
| 432 |
-
.gr-textbox, .gr-dropdown, .gr-radio, .gr-file {background-color: #ffffff; border: 1px solid #ddd; border-radius: 5px; padding: 12px; margin-right: 15px; width: 95%; box-sizing: border-box; font-size: 16px; color: #2c3e50; transition: all 0.3s ease;}
|
| 433 |
-
.gr-row {display: flex; justify-content: space-between; margin-bottom: 15px;}
|
| 434 |
-
.gr-column {padding: 15px;}
|
| 435 |
-
h1, h2, h3 {color: #2c3e50; text-align: left; margin-bottom: 20px; font-size: 24px; transition: all 0.3s ease;}
|
| 436 |
-
.markdown {color: #2c3e50; margin-bottom: 15px; font-size: 16px; transition: all 0.3s ease;}
|
| 437 |
-
/* نمایش بهتر کلمات انگلیسی (بدون نیاز به راستچین) */
|
| 438 |
-
.english {display: inline; font-family: 'Arial', sans-serif; background: #e0e0e0; padding: 2px 4px; border-radius: 3px; box-shadow: 0 1px 2px rgba(0,0,0,0.1);}
|
| 439 |
-
"""
|
| 440 |
-
|
| 441 |
with gr.Blocks(css=css, title="Professional Thesis Analyzer with Gemini") as iface:
|
| 442 |
-
gr.
|
| 443 |
-
|
| 444 |
with gr.Row():
|
| 445 |
with gr.Column():
|
|
|
|
|
|
|
| 446 |
pdf_input = gr.File(file_types=['.pdf'], label="Upload <span class='english'>PDF</span> File", file_count="multiple")
|
| 447 |
mode = gr.Radio(
|
| 448 |
["Academic Analysis (RAG)", "Auto Summary", "Plagiarism Check", "Quality Evaluation", "Standard Response"],
|
|
@@ -451,15 +495,22 @@ with gr.Blocks(css=css, title="Professional Thesis Analyzer with Gemini") as ifa
|
|
| 451 |
)
|
| 452 |
query = gr.Textbox(lines=3, placeholder="Enter your question or request here...", label="Question or Request")
|
| 453 |
section = gr.Dropdown(["Entire Document", "Introduction", "Methodology", "Results", "Discussion", "References"], label="Target Section", value="Entire Document")
|
| 454 |
-
|
| 455 |
detail = gr.Dropdown(["Brief", "Detailed"], label="Detail Level", value="Detailed")
|
| 456 |
submit = gr.Button("Submit")
|
| 457 |
with gr.Column():
|
| 458 |
output = gr.Textbox(label="Processing Result", lines=10, placeholder="Results will be displayed here...")
|
| 459 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
submit.click(
|
| 461 |
fn=academic_chatbot,
|
| 462 |
-
inputs=[pdf_input, mode, query,
|
| 463 |
outputs=output
|
| 464 |
)
|
| 465 |
|
|
|
|
| 23 |
# تنظیم API Key
|
| 24 |
gemini_api_key = os.environ.get('GEMINI_API_KEY')
|
| 25 |
if not gemini_api_key:
|
| 26 |
+
raise ValueError("GOOGLE_API_KEY not found. Please set it in the Space settings.")
|
| 27 |
genai.configure(api_key=gemini_api_key)
|
| 28 |
|
| 29 |
+
# متون برای زبانهای مختلف
|
| 30 |
+
TEXTS = {
|
| 31 |
+
"English": {
|
| 32 |
+
"title": "Professional Thesis Analyzer with Gemini",
|
| 33 |
+
"description": "Upload your <span class='english'>PDF</span> file and use the analysis, summary, plagiarism check, or quality evaluation features.",
|
| 34 |
+
"upload_label": "Upload <span class='english'>PDF</span> File",
|
| 35 |
+
"mode_label": "Processing Mode",
|
| 36 |
+
"modes": ["Academic Analysis (RAG)", "Auto Summary", "Plagiarism Check", "Quality Evaluation", "Standard Response"],
|
| 37 |
+
"query_label": "Question or Request",
|
| 38 |
+
"query_placeholder": "Enter your question or request here...",
|
| 39 |
+
"section_label": "Target Section",
|
| 40 |
+
"sections": ["Entire Document", "Introduction", "Methodology", "Results", "Discussion", "References"],
|
| 41 |
+
"language_label": "Response Language",
|
| 42 |
+
"languages": ["English", "Farsi"],
|
| 43 |
+
"detail_label": "Detail Level",
|
| 44 |
+
"details": ["Brief", "Detailed"],
|
| 45 |
+
"submit": "Submit",
|
| 46 |
+
"output_label": "Processing Result",
|
| 47 |
+
"output_placeholder": "Results will be displayed here..."
|
| 48 |
+
},
|
| 49 |
+
"Farsi": {
|
| 50 |
+
"title": "تحلیلگر حرفهای پایاننامه با Gemini",
|
| 51 |
+
"description": "فایل <span class='english'>PDF</span> خود را آپلود کنید و از قابلیتهای تحلیل، خلاصه، چک سرقت ادبی یا ارزیابی کیفیت استفاده کنید.",
|
| 52 |
+
"upload_label": "آپلود فایل <span class='english'>PDF</span>",
|
| 53 |
+
"mode_label": "حالت پردازش",
|
| 54 |
+
"modes": ["تحلیل آکادمیک (RAG)", "خلاصه خودکار", "چک سرقت ادبی", "ارزیابی کیفیت", "پاسخ استاندارد"],
|
| 55 |
+
"query_label": "سوال یا درخواست",
|
| 56 |
+
"query_placeholder": "سوال یا درخواست خود را اینجا بنویسید...",
|
| 57 |
+
"section_label": "بخش موردنظر",
|
| 58 |
+
"sections": ["کل سند", "مقدمه", "روششناسی", "نتایج", "بحث", "منابع"],
|
| 59 |
+
"language_label": "زبان پاسخ",
|
| 60 |
+
"languages": ["انگلیسی", "فارسی"],
|
| 61 |
+
"detail_label": "سطح جزئیات",
|
| 62 |
+
"details": ["خلاصه", "جامع"],
|
| 63 |
+
"submit": "ارسال",
|
| 64 |
+
"output_label": "نتیجه پردازش",
|
| 65 |
+
"output_placeholder": "نتایج اینجا نمایش داده میشود..."
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
def process_single_pdf(pdf_file):
|
| 70 |
pdf_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
|
| 71 |
logger.info(f"Starting to process file: {pdf_path}")
|
|
|
|
| 339 |
|
| 340 |
llm_gemini = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=gemini_api_key, convert_system_message_to_human=True, temperature=0.5)
|
| 341 |
|
| 342 |
+
def update_interface(language):
|
| 343 |
+
texts = TEXTS[language]
|
| 344 |
+
return (
|
| 345 |
+
gr.Markdown.update(value=f"#{texts['title']}"),
|
| 346 |
+
gr.Markdown.update(value=texts['description']),
|
| 347 |
+
gr.File.update(label=texts['upload_label']),
|
| 348 |
+
gr.Radio.update(label=texts['mode_label'], choices=texts['modes']),
|
| 349 |
+
gr.Textbox.update(label=texts['query_label'], placeholder=texts['query_placeholder']),
|
| 350 |
+
gr.Dropdown.update(label=texts['section_label'], choices=texts['sections']),
|
| 351 |
+
gr.Dropdown.update(label=texts['language_label'], choices=texts['languages']),
|
| 352 |
+
gr.Dropdown.update(label=texts['detail_label'], choices=texts['details']),
|
| 353 |
+
gr.Button.update(value=texts['submit']),
|
| 354 |
+
gr.Textbox.update(label=texts['output_label'], placeholder=texts['output_placeholder'])
|
| 355 |
+
)
|
| 356 |
+
|
| 357 |
+
def academic_chatbot(pdf_file, mode, query, language, detail_level, section_dropdown):
|
| 358 |
+
start_time = time.time()
|
| 359 |
+
logger.info(f"Starting processing - Mode: {mode}, Question: {query}, Language: {language}, Detail: {detail_level}, Section: {section_dropdown}")
|
| 360 |
+
|
| 361 |
+
if mode != "Standard Response" and not pdf_file:
|
| 362 |
+
return "Please upload at least one PDF file."
|
| 363 |
+
|
| 364 |
+
if mode == "Standard Response":
|
| 365 |
+
chain = create_conversation_chain(None, None, mode, language, detail_level)
|
| 366 |
+
try:
|
| 367 |
+
result = chain.invoke({"question": query})["text"]
|
| 368 |
+
return f"{result}\n\n⏱ Processing time: {time.time() - start_time:.2f} seconds"
|
| 369 |
+
except Exception as e:
|
| 370 |
+
logger.error(f"Error in standard processing: {str(e)}")
|
| 371 |
+
return f"Error: {str(e)}"
|
| 372 |
+
|
| 373 |
+
pdf_files = pdf_file if isinstance(pdf_file, list) else [pdf_file]
|
| 374 |
+
_, docs, sections, error = upload_and_process_pdf(pdf_files)
|
| 375 |
+
if error:
|
| 376 |
+
return error
|
| 377 |
+
|
| 378 |
+
target_docs = docs if section_dropdown == "Entire Document" else sections.get(section_dropdown, docs)
|
| 379 |
+
context = " ".join([doc.page_content for doc in target_docs])
|
| 380 |
+
|
| 381 |
+
vector_store = None
|
| 382 |
+
if mode in ["Academic Analysis (RAG)", "Plagiarism Check", "Quality Evaluation"]:
|
| 383 |
+
vector_store, vectordb_error = create_vector_db(target_docs)
|
| 384 |
+
if vectordb_error:
|
| 385 |
+
return vectordb_error
|
| 386 |
+
|
| 387 |
+
chain = create_conversation_chain(vector_store, target_docs, mode, language, detail_level, section_dropdown)
|
| 388 |
+
try:
|
| 389 |
+
if mode == "Auto Summary":
|
| 390 |
+
time.sleep(2)
|
| 391 |
+
result = chain.invoke({"context": context[:5000]})["text"]
|
| 392 |
+
elif mode == "Plagiarism Check":
|
| 393 |
+
plagiarism_result = check_plagiarism(context)
|
| 394 |
+
result = plagiarism_result
|
| 395 |
+
elif mode == "Quality Evaluation":
|
| 396 |
+
score, explanation, suggestions, auto_fix = evaluate_quality(target_docs, sections)
|
| 397 |
+
time.sleep(2)
|
| 398 |
+
result = chain.invoke({"context": context[:5000], "score": score, "explanation": explanation, "suggestions": suggestions})["text"] + auto_fix
|
| 399 |
+
else:
|
| 400 |
+
result = chain.invoke({"question": query, "chat_history": []})["answer"]
|
| 401 |
+
|
| 402 |
+
if mode not in ["Plagiarism Check", "Quality Evaluation"]:
|
| 403 |
+
resources = suggest_resources(context)
|
| 404 |
+
result += "\n\n**Suggested Resources:**\n" + "\n".join(resources)
|
| 405 |
+
|
| 406 |
+
return f"{result}\n\n⏱ Processing time: {time.time() - start_time:.2f} seconds"
|
| 407 |
+
except Exception as e:
|
| 408 |
+
logger.error(f"Error in processing: {str(e)}")
|
| 409 |
+
if "429" in str(e):
|
| 410 |
+
return "Error: Rate limit exceeded for Gemini API. Please wait a few minutes and try again."
|
| 411 |
+
return f"Error: {str(e)}"
|
| 412 |
+
|
| 413 |
academic_analysis_prompt = PromptTemplate(
|
| 414 |
template="""You are a professional academic analyst. Provide a deep and structured analysis of {section}:
|
| 415 |
1. Based solely on the provided text.
|
|
|
|
| 479 |
chain = LLMChain(llm=llm_gemini, prompt=general_qa_prompt.partial(language=language))
|
| 480 |
return chain
|
| 481 |
|
| 482 |
+
# رابط کاربری با قابلیت تغییر زبان
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
with gr.Blocks(css=css, title="Professional Thesis Analyzer with Gemini") as iface:
|
| 484 |
+
language = gr.State(value="English") # حالت پیشفرض انگلیسی
|
| 485 |
+
|
| 486 |
with gr.Row():
|
| 487 |
with gr.Column():
|
| 488 |
+
title = gr.Markdown("# Professional Thesis Analyzer with Gemini")
|
| 489 |
+
description = gr.Markdown("Upload your <span class='english'>PDF</span> file and use the analysis, summary, plagiarism check, or quality evaluation features.")
|
| 490 |
pdf_input = gr.File(file_types=['.pdf'], label="Upload <span class='english'>PDF</span> File", file_count="multiple")
|
| 491 |
mode = gr.Radio(
|
| 492 |
["Academic Analysis (RAG)", "Auto Summary", "Plagiarism Check", "Quality Evaluation", "Standard Response"],
|
|
|
|
| 495 |
)
|
| 496 |
query = gr.Textbox(lines=3, placeholder="Enter your question or request here...", label="Question or Request")
|
| 497 |
section = gr.Dropdown(["Entire Document", "Introduction", "Methodology", "Results", "Discussion", "References"], label="Target Section", value="Entire Document")
|
| 498 |
+
language_dropdown = gr.Dropdown(["English", "Farsi"], label="Response Language", value="English", interactive=True)
|
| 499 |
detail = gr.Dropdown(["Brief", "Detailed"], label="Detail Level", value="Detailed")
|
| 500 |
submit = gr.Button("Submit")
|
| 501 |
with gr.Column():
|
| 502 |
output = gr.Textbox(label="Processing Result", lines=10, placeholder="Results will be displayed here...")
|
| 503 |
+
|
| 504 |
+
# بهروزرسانی رابط کاربری بر اساس زبان
|
| 505 |
+
language_dropdown.change(
|
| 506 |
+
fn=update_interface,
|
| 507 |
+
inputs=language_dropdown,
|
| 508 |
+
outputs=[title, description, pdf_input, mode, query, section, language_dropdown, detail, submit, output]
|
| 509 |
+
)
|
| 510 |
+
|
| 511 |
submit.click(
|
| 512 |
fn=academic_chatbot,
|
| 513 |
+
inputs=[pdf_input, mode, query, language_dropdown, detail, section],
|
| 514 |
outputs=output
|
| 515 |
)
|
| 516 |
|