Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -28,40 +28,40 @@ genai.configure(api_key=gemini_api_key)
|
|
| 28 |
|
| 29 |
def process_single_pdf(pdf_file):
|
| 30 |
pdf_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
|
| 31 |
-
logger.info(f"
|
| 32 |
if not os.path.isfile(pdf_path):
|
| 33 |
-
logger.error(f"
|
| 34 |
return None, None
|
| 35 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
|
| 36 |
loader = PyPDFLoader(pdf_path)
|
| 37 |
try:
|
| 38 |
pages = loader.load_and_split()
|
| 39 |
docs = text_splitter.split_documents(pages)
|
| 40 |
-
sections = {"
|
| 41 |
for doc in docs:
|
| 42 |
text = doc.page_content
|
| 43 |
-
if re.search(r"
|
| 44 |
-
sections["
|
| 45 |
-
elif re.search(r"
|
| 46 |
-
sections["
|
| 47 |
-
elif re.search(r"
|
| 48 |
-
sections["
|
| 49 |
-
elif re.search(r"
|
| 50 |
-
sections["
|
| 51 |
-
elif re.search(r"
|
| 52 |
-
sections["
|
| 53 |
-
logger.info(f"
|
| 54 |
return docs, sections
|
| 55 |
except Exception as e:
|
| 56 |
-
logger.error(f"
|
| 57 |
return None, None
|
| 58 |
|
| 59 |
def upload_and_process_pdf(pdf_files):
|
| 60 |
if not pdf_files:
|
| 61 |
-
return None, None, None, "
|
| 62 |
-
logger.info(f"
|
| 63 |
all_docs = []
|
| 64 |
-
all_sections = {"
|
| 65 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 66 |
future_to_file = {executor.submit(process_single_pdf, pdf_file): pdf_file for pdf_file in pdf_files}
|
| 67 |
for future in concurrent.futures.as_completed(future_to_file):
|
|
@@ -72,47 +72,47 @@ def upload_and_process_pdf(pdf_files):
|
|
| 72 |
all_sections[key].extend(sections[key])
|
| 73 |
else:
|
| 74 |
pdf_file = future_to_file[future]
|
| 75 |
-
return None, None, None, f"
|
| 76 |
-
logger.info(f"
|
| 77 |
return None, all_docs, all_sections, None
|
| 78 |
|
| 79 |
def create_vector_db(docs):
|
| 80 |
if not docs:
|
| 81 |
-
return None, "
|
| 82 |
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=gemini_api_key)
|
| 83 |
try:
|
| 84 |
-
logger.info("
|
| 85 |
vector_store = FAISS.from_documents(docs, embedding=embeddings)
|
| 86 |
-
logger.info(f"
|
| 87 |
return vector_store, None
|
| 88 |
except Exception as e:
|
| 89 |
-
logger.error(f"
|
| 90 |
-
return None, f"
|
| 91 |
|
| 92 |
def extract_keywords(text):
|
| 93 |
try:
|
| 94 |
-
prompt = f"
|
| 95 |
model = genai.GenerativeModel('gemini-pro')
|
| 96 |
response = model.generate_content(prompt)
|
| 97 |
-
keywords = response.text.split("
|
| 98 |
-
logger.info(f"
|
| 99 |
time.sleep(1)
|
| 100 |
return keywords[:5]
|
| 101 |
except Exception as e:
|
| 102 |
-
logger.error(f"
|
| 103 |
-
return ["
|
| 104 |
|
| 105 |
def translate_to_english(text):
|
| 106 |
try:
|
| 107 |
-
prompt = f"
|
| 108 |
model = genai.GenerativeModel('gemini-pro')
|
| 109 |
response = model.generate_content(prompt)
|
| 110 |
-
translated_text = response.text.split("
|
| 111 |
-
logger.info(f"
|
| 112 |
time.sleep(1)
|
| 113 |
return translated_text
|
| 114 |
except Exception as e:
|
| 115 |
-
logger.error(f"
|
| 116 |
return text
|
| 117 |
|
| 118 |
def check_plagiarism(text):
|
|
@@ -121,72 +121,72 @@ def check_plagiarism(text):
|
|
| 121 |
translated_keywords = translate_to_english(" ".join(keywords))
|
| 122 |
query = translated_keywords
|
| 123 |
|
| 124 |
-
#
|
| 125 |
url_scholar = f"https://scholar.google.com/scholar?q={query}"
|
| 126 |
response_scholar = requests.get(url_scholar, headers={"User-Agent": "Mozilla/5.0"})
|
| 127 |
soup_scholar = BeautifulSoup(response_scholar.text, 'html.parser')
|
| 128 |
results_scholar = []
|
| 129 |
for item in soup_scholar.find_all('h3', class_='gs_rt', limit=5):
|
| 130 |
title = item.get_text().strip()
|
| 131 |
-
link = item.find('a')['href'] if item.find('a') else "
|
| 132 |
author_info = item.find_next('div', class_='gs_a')
|
| 133 |
if author_info:
|
| 134 |
author_year = author_info.get_text().strip()
|
| 135 |
author_match = re.search(r"(.+?)(?: - (\d{4}))?", author_year)
|
| 136 |
-
author = author_match.group(1) if author_match.group(1) else "
|
| 137 |
-
year = author_match.group(2) if author_match.group(2) else "
|
| 138 |
else:
|
| 139 |
-
author, year = "
|
| 140 |
results_scholar.append((title, link, author, year))
|
| 141 |
-
logger.info(f"
|
| 142 |
|
| 143 |
-
#
|
| 144 |
url_arxiv = f"https://arxiv.org/search/?query={query}&searchtype=all&source=header"
|
| 145 |
response_arxiv = requests.get(url_arxiv, headers={"User-Agent": "Mozilla/5.0"})
|
| 146 |
soup_arxiv = BeautifulSoup(response_arxiv.text, 'html.parser')
|
| 147 |
results_arxiv = []
|
| 148 |
for item in soup_arxiv.find_all('p', class_='title', limit=5):
|
| 149 |
title = item.get_text().strip()
|
| 150 |
-
link = item.find_previous('a', class_='arxiv-url')['href'] if item.find_previous('a', class_='arxiv-url') else "
|
| 151 |
author_info = item.find_next('p', class_='authors')
|
| 152 |
year_info = item.find_next('p', class_='is-size-7')
|
| 153 |
-
author = author_info.get_text().replace("Authors:", "").strip() if author_info else "
|
| 154 |
-
year = re.search(r"\d{4}", year_info.get_text() if year_info else "").group(0) if re.search(r"\d{4}", year_info.get_text() if year_info else "") else "
|
| 155 |
results_arxiv.append((title, link, author, year))
|
| 156 |
-
logger.info(f"
|
| 157 |
|
| 158 |
all_results = results_scholar + results_arxiv
|
| 159 |
if not all_results:
|
| 160 |
-
return "
|
| 161 |
|
| 162 |
max_similarity = 0
|
| 163 |
matched_texts = []
|
| 164 |
for title, link, author, year in all_results:
|
| 165 |
similarity = SequenceMatcher(None, text[:1000], title).ratio()
|
| 166 |
-
if similarity > 0.1: #
|
| 167 |
-
matched_texts.append(f"
|
| 168 |
if similarity > max_similarity:
|
| 169 |
max_similarity = similarity
|
| 170 |
|
| 171 |
time.sleep(1)
|
| 172 |
similarity_percent = max_similarity * 100
|
| 173 |
if not matched_texts:
|
| 174 |
-
return "
|
| 175 |
|
| 176 |
if similarity_percent > 20:
|
| 177 |
-
status = "
|
| 178 |
elif similarity_percent > 10:
|
| 179 |
-
status = "
|
| 180 |
else:
|
| 181 |
-
status = "
|
| 182 |
|
| 183 |
-
output = (f"
|
| 184 |
-
f"
|
| 185 |
-
f"
|
| 186 |
return output
|
| 187 |
except Exception as e:
|
| 188 |
-
logger.error(f"
|
| 189 |
-
return f"
|
| 190 |
|
| 191 |
def suggest_resources(text):
|
| 192 |
try:
|
|
@@ -196,20 +196,20 @@ def suggest_resources(text):
|
|
| 196 |
url_scholar = f"https://scholar.google.com/scholar?q={query}"
|
| 197 |
response_scholar = requests.get(url_scholar, headers={"User-Agent": "Mozilla/5.0"})
|
| 198 |
soup_scholar = BeautifulSoup(response_scholar.text, 'html.parser')
|
| 199 |
-
papers_scholar = [f"{h.get_text().strip()} (
|
| 200 |
|
| 201 |
url_arxiv = f"https://arxiv.org/search/?query={query}&searchtype=all&source=header"
|
| 202 |
response_arxiv = requests.get(url_arxiv, headers={"User-Agent": "Mozilla/5.0"})
|
| 203 |
soup_arxiv = BeautifulSoup(response_arxiv.text, 'html.parser')
|
| 204 |
-
papers_arxiv = [f"{paper.get_text().strip()} (
|
| 205 |
for paper in soup_arxiv.find_all('p', class_='title')[:2] if paper.find_previous('a', class_='arxiv-url')]
|
| 206 |
|
| 207 |
resources = papers_scholar + papers_arxiv if papers_scholar else papers_arxiv
|
| 208 |
time.sleep(1)
|
| 209 |
-
return resources if resources else ["
|
| 210 |
except Exception as e:
|
| 211 |
-
logger.error(f"
|
| 212 |
-
return ["
|
| 213 |
|
| 214 |
def evaluate_quality(docs, sections):
|
| 215 |
text = " ".join([doc.page_content for doc in docs])
|
|
@@ -218,67 +218,67 @@ def evaluate_quality(docs, sections):
|
|
| 218 |
suggestions = []
|
| 219 |
auto_fix = ""
|
| 220 |
|
| 221 |
-
#
|
| 222 |
ref_count = len(re.findall(r"\[\d+\]|[A-Za-z]+\s+\d{4}", text))
|
| 223 |
if ref_count > 15:
|
| 224 |
score += 30
|
| 225 |
-
explanation.append("
|
| 226 |
elif ref_count > 10:
|
| 227 |
score += 25
|
| 228 |
-
explanation.append("
|
| 229 |
elif ref_count > 0:
|
| 230 |
score += 15
|
| 231 |
-
explanation.append("
|
| 232 |
-
suggestions.append("
|
| 233 |
else:
|
| 234 |
-
explanation.append("
|
| 235 |
-
suggestions.append("
|
| 236 |
-
auto_fix += "\n
|
| 237 |
|
| 238 |
-
#
|
| 239 |
words = text.split()
|
| 240 |
word_freq = Counter(words).most_common(10)
|
| 241 |
-
keywords = [word[0] for word in word_freq[:3]] if word_freq else ["
|
| 242 |
scientific_terms = sum(1 for word in words if word.lower() in ["analysis", "data", "method", "result", "hypothesis", "theory"])
|
| 243 |
if word_freq and word_freq[0][1] > len(words) * 0.02 and scientific_terms > len(words) * 0.05:
|
| 244 |
score += 25
|
| 245 |
-
explanation.append("
|
| 246 |
else:
|
| 247 |
-
explanation.append("
|
| 248 |
-
suggestions.append(f"
|
| 249 |
try:
|
| 250 |
-
prompt = f"
|
| 251 |
model = genai.GenerativeModel('gemini-pro')
|
| 252 |
response = model.generate_content(prompt)
|
| 253 |
-
auto_fix += f"\n
|
| 254 |
time.sleep(1)
|
| 255 |
except Exception as e:
|
| 256 |
-
logger.error(f"
|
| 257 |
-
auto_fix += "\n
|
| 258 |
|
| 259 |
-
#
|
| 260 |
-
if re.search(r"
|
| 261 |
score += 20
|
| 262 |
-
explanation.append("
|
| 263 |
else:
|
| 264 |
-
explanation.append("
|
| 265 |
-
suggestions.append("
|
| 266 |
|
| 267 |
-
#
|
| 268 |
-
analysis_text = " ".join([doc.page_content for doc in sections.get("
|
| 269 |
-
stats_found = bool(re.search(r"
|
| 270 |
if len(analysis_text.split()) > 1500 and stats_found:
|
| 271 |
score += 25
|
| 272 |
-
explanation.append("
|
| 273 |
elif len(analysis_text.split()) > 1000:
|
| 274 |
score += 15
|
| 275 |
-
explanation.append("
|
| 276 |
-
suggestions.append("
|
| 277 |
else:
|
| 278 |
-
explanation.append("
|
| 279 |
-
suggestions.append("
|
| 280 |
|
| 281 |
-
#
|
| 282 |
try:
|
| 283 |
keywords = extract_keywords(text)
|
| 284 |
translated_text = translate_to_english(" ".join(keywords))
|
|
@@ -288,11 +288,11 @@ def evaluate_quality(docs, sections):
|
|
| 288 |
soup_arxiv = BeautifulSoup(response_arxiv.text, 'html.parser')
|
| 289 |
arxiv_titles = [paper.get_text().strip() for paper in soup_arxiv.find_all('p', class_='title')[:3]]
|
| 290 |
if arxiv_titles:
|
| 291 |
-
suggestions.append(f"
|
| 292 |
time.sleep(1)
|
| 293 |
except Exception as e:
|
| 294 |
-
logger.error(f"
|
| 295 |
-
suggestions.append("
|
| 296 |
|
| 297 |
score = max(min(score, 100), 0)
|
| 298 |
return score, "; ".join(explanation), "; ".join(suggestions), auto_fix
|
|
@@ -300,69 +300,69 @@ def evaluate_quality(docs, sections):
|
|
| 300 |
llm_gemini = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=gemini_api_key, convert_system_message_to_human=True, temperature=0.5)
|
| 301 |
|
| 302 |
academic_analysis_prompt = PromptTemplate(
|
| 303 |
-
template="""
|
| 304 |
-
1.
|
| 305 |
-
2.
|
| 306 |
-
3.
|
| 307 |
-
|
| 308 |
{context}
|
| 309 |
-
|
| 310 |
-
|
| 311 |
input_variables=["section", "context", "question", "language", "detail_level"]
|
| 312 |
)
|
| 313 |
|
| 314 |
summary_prompt = PromptTemplate(
|
| 315 |
-
template="""
|
| 316 |
-
1.
|
| 317 |
-
2.
|
| 318 |
-
3.
|
| 319 |
-
4.
|
| 320 |
-
|
| 321 |
{context}
|
| 322 |
-
|
| 323 |
input_variables=["context", "language"]
|
| 324 |
)
|
| 325 |
|
| 326 |
general_qa_prompt = PromptTemplate(
|
| 327 |
-
template="""
|
| 328 |
-
|
| 329 |
-
|
| 330 |
input_variables=["question", "language"]
|
| 331 |
)
|
| 332 |
|
| 333 |
plagiarism_prompt = PromptTemplate(
|
| 334 |
-
template="""
|
| 335 |
-
|
| 336 |
{context}
|
| 337 |
-
|
| 338 |
input_variables=["context", "similarity"]
|
| 339 |
)
|
| 340 |
|
| 341 |
quality_prompt = PromptTemplate(
|
| 342 |
-
template="""
|
| 343 |
-
|
| 344 |
{context}
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
input_variables=["context", "score", "explanation", "suggestions"]
|
| 349 |
)
|
| 350 |
|
| 351 |
def create_conversation_chain(vector_store, docs, mode, language, detail_level, section=None):
|
| 352 |
-
if mode == "
|
| 353 |
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
|
| 354 |
chain = ConversationalRetrievalChain.from_llm(
|
| 355 |
llm=llm_gemini,
|
| 356 |
retriever=retriever,
|
| 357 |
return_source_documents=True,
|
| 358 |
-
combine_docs_chain_kwargs={"prompt": academic_analysis_prompt.partial(language=language, detail_level=detail_level, section=section or "
|
| 359 |
verbose=True
|
| 360 |
)
|
| 361 |
-
elif mode == "
|
| 362 |
chain = LLMChain(llm=llm_gemini, prompt=summary_prompt.partial(language=language))
|
| 363 |
-
elif mode == "
|
| 364 |
chain = LLMChain(llm=llm_gemini, prompt=plagiarism_prompt.partial(language=language))
|
| 365 |
-
elif mode == "
|
| 366 |
chain = LLMChain(llm=llm_gemini, prompt=quality_prompt.partial(language=language))
|
| 367 |
else:
|
| 368 |
chain = LLMChain(llm=llm_gemini, prompt=general_qa_prompt.partial(language=language))
|
|
@@ -370,141 +370,92 @@ def create_conversation_chain(vector_store, docs, mode, language, detail_level,
|
|
| 370 |
|
| 371 |
def academic_chatbot(pdf_file, mode, query, language, detail_level, section_dropdown):
|
| 372 |
start_time = time.time()
|
| 373 |
-
logger.info(f"
|
| 374 |
|
| 375 |
-
if mode != "
|
| 376 |
-
return "
|
| 377 |
|
| 378 |
-
if mode == "
|
| 379 |
chain = create_conversation_chain(None, None, mode, language, detail_level)
|
| 380 |
try:
|
| 381 |
result = chain.invoke({"question": query})["text"]
|
| 382 |
-
return f"{result}\n\n⏱
|
| 383 |
except Exception as e:
|
| 384 |
-
logger.error(f"
|
| 385 |
-
return f"
|
| 386 |
|
| 387 |
pdf_files = pdf_file if isinstance(pdf_file, list) else [pdf_file]
|
| 388 |
_, docs, sections, error = upload_and_process_pdf(pdf_files)
|
| 389 |
if error:
|
| 390 |
return error
|
| 391 |
|
| 392 |
-
target_docs = docs if section_dropdown == "
|
| 393 |
context = " ".join([doc.page_content for doc in target_docs])
|
| 394 |
|
| 395 |
vector_store = None
|
| 396 |
-
if mode in ["
|
| 397 |
vector_store, vectordb_error = create_vector_db(target_docs)
|
| 398 |
if vectordb_error:
|
| 399 |
return vectordb_error
|
| 400 |
|
| 401 |
chain = create_conversation_chain(vector_store, target_docs, mode, language, detail_level, section_dropdown)
|
| 402 |
try:
|
| 403 |
-
if mode == "
|
| 404 |
time.sleep(2)
|
| 405 |
result = chain.invoke({"context": context[:5000]})["text"]
|
| 406 |
-
elif mode == "
|
| 407 |
plagiarism_result = check_plagiarism(context)
|
| 408 |
result = plagiarism_result
|
| 409 |
-
elif mode == "
|
| 410 |
score, explanation, suggestions, auto_fix = evaluate_quality(target_docs, sections)
|
| 411 |
time.sleep(2)
|
| 412 |
result = chain.invoke({"context": context[:5000], "score": score, "explanation": explanation, "suggestions": suggestions})["text"] + auto_fix
|
| 413 |
else:
|
| 414 |
result = chain.invoke({"question": query, "chat_history": []})["answer"]
|
| 415 |
|
| 416 |
-
if mode not in ["
|
| 417 |
resources = suggest_resources(context)
|
| 418 |
-
result += "\n\n
|
| 419 |
|
| 420 |
-
return f"{result}\n\n⏱
|
| 421 |
except Exception as e:
|
| 422 |
-
logger.error(f"
|
| 423 |
if "429" in str(e):
|
| 424 |
-
return "
|
| 425 |
-
return f"
|
| 426 |
|
| 427 |
-
# CSS
|
| 428 |
css = """
|
| 429 |
-
:
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
--input-border-dark: #4a5568;
|
| 440 |
-
}
|
| 441 |
-
|
| 442 |
-
[data-theme="dark"] {
|
| 443 |
-
--bg-light: #1a202c;
|
| 444 |
-
--bg-dark: #f0f4f8;
|
| 445 |
-
--text-light: #ffffff;
|
| 446 |
-
--text-dark: #2c3e50;
|
| 447 |
-
--input-bg: #2d3748;
|
| 448 |
-
--input-border: #4a5568;
|
| 449 |
-
}
|
| 450 |
-
|
| 451 |
-
body {background-color: var(--bg-light); font-family: 'Vazir', 'Arial', sans-serif; direction: rtl; text-align: right; color: var(--text-light); transition: all 0.3s ease;}
|
| 452 |
-
.gr-button {background-color: var(--button-bg); color: white; border: none; padding: 12px 25px; border-radius: 5px; margin-right: 25px; width: auto; display: inline-block; transition: all 0.3s ease;}
|
| 453 |
-
.gr-button:hover {background-color: var(--button-hover);}
|
| 454 |
-
.gr-textbox, .gr-dropdown, .gr-radio, .gr-file {direction: rtl; text-align: right; background-color: var(--input-bg); border: 1px solid var(--input-border); border-radius: 5px; padding: 12px; margin-right: 20px; width: 95%; box-sizing: border-box; font-size: 16px; color: var(--text-light); transition: all 0.3s ease;}
|
| 455 |
-
.gr-row {display: flex; justify-content: space-between; margin-bottom: 20px;}
|
| 456 |
-
.gr-column {padding: 20px;}
|
| 457 |
-
h1, h2, h3 {color: var(--text-light); text-align: center; margin-bottom: 25px; font-size: 24px; transition: all 0.3s ease;}
|
| 458 |
-
.markdown {direction: rtl; text-align: right; color: var(--text-light); margin-bottom: 20px; font-size: 16px; transition: all 0.3s ease;}
|
| 459 |
-
/* نمایش بهتر کلمات انگلیسی در متن فارسی */
|
| 460 |
-
body {unicode-bidi: embed;}
|
| 461 |
-
body * {unicode-bidi: embed;}
|
| 462 |
-
.english {direction: ltr; display: inline-block; margin-left: 10px; padding: 3px 5px; font-family: 'Arial', sans-serif; background: var(--input-bg); border-radius: 3px; box-shadow: 0 1px 2px rgba(0,0,0,0.1); color: var(--text-light);}
|
| 463 |
-
/* تنظیم فلش منوهای کشویی */
|
| 464 |
-
.gr-dropdown {position: relative;}
|
| 465 |
-
.gr-dropdown select {direction: rtl; text-align: right; padding: 12px 40px 12px 15px; appearance: none; background: url('data:image/svg+xml;utf8,<svg fill="%23ffffff" height="24" viewBox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg"><path d="M7 10l5 5 5-5z"/></svg>') no-repeat right 15px center; background-size: 20px; border: 1px solid var(--input-border); border-radius: 5px; font-size: 16px; color: var(--text-light); background-color: var(--input-bg);}
|
| 466 |
-
.gr-dropdown select option {direction: rtl; text-align: right; padding: 10px; background-color: var(--input-bg); color: var(--text-light);}
|
| 467 |
-
|
| 468 |
-
/* سوئیچ Dark/Light Mode */
|
| 469 |
-
#theme-toggle {position: fixed; top: 10px; left: 10px; padding: 8px 15px; background-color: var(--button-bg); color: white; border-radius: 5px; cursor: pointer; z-index: 1000; transition: all 0.3s ease;}
|
| 470 |
-
#theme-toggle:hover {background-color: var(--button-hover);}
|
| 471 |
-
"""
|
| 472 |
-
|
| 473 |
-
# JavaScript برای تغییر تم
|
| 474 |
-
js = """
|
| 475 |
-
function toggleTheme() {
|
| 476 |
-
const body = document.body;
|
| 477 |
-
const currentTheme = body.getAttribute('data-theme') || 'light';
|
| 478 |
-
const newTheme = currentTheme === 'light' ? 'dark' : 'light';
|
| 479 |
-
body.setAttribute('data-theme', newTheme);
|
| 480 |
-
const button = document.getElementById('theme-toggle');
|
| 481 |
-
button.textContent = newTheme === 'dark' ? 'تغییر به Light Mode' : 'تغییر به Dark Mode';
|
| 482 |
-
return newTheme;
|
| 483 |
-
}
|
| 484 |
-
|
| 485 |
-
document.getElementById('theme-toggle').addEventListener('click', function() {
|
| 486 |
-
toggleTheme();
|
| 487 |
-
});
|
| 488 |
"""
|
| 489 |
|
| 490 |
-
with gr.Blocks(css=css, title="
|
| 491 |
-
gr.Markdown("#
|
| 492 |
-
gr.Markdown("
|
| 493 |
with gr.Row():
|
| 494 |
with gr.Column():
|
| 495 |
-
pdf_input = gr.File(file_types=['.pdf'], label="
|
| 496 |
mode = gr.Radio(
|
| 497 |
-
["
|
| 498 |
-
label="
|
| 499 |
-
value="
|
| 500 |
)
|
| 501 |
-
query = gr.Textbox(lines=3, placeholder="
|
| 502 |
-
section = gr.Dropdown(["
|
| 503 |
-
language = gr.Dropdown(["
|
| 504 |
-
detail = gr.Dropdown(["
|
| 505 |
-
submit = gr.Button("
|
| 506 |
with gr.Column():
|
| 507 |
-
output = gr.Textbox(label="
|
| 508 |
|
| 509 |
submit.click(
|
| 510 |
fn=academic_chatbot,
|
|
|
|
| 28 |
|
| 29 |
def process_single_pdf(pdf_file):
|
| 30 |
pdf_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
|
| 31 |
+
logger.info(f"Starting to process file: {pdf_path}")
|
| 32 |
if not os.path.isfile(pdf_path):
|
| 33 |
+
logger.error(f"File {pdf_path} does not exist.")
|
| 34 |
return None, None
|
| 35 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
|
| 36 |
loader = PyPDFLoader(pdf_path)
|
| 37 |
try:
|
| 38 |
pages = loader.load_and_split()
|
| 39 |
docs = text_splitter.split_documents(pages)
|
| 40 |
+
sections = {"Introduction": [], "Methodology": [], "Results": [], "Discussion": [], "References": []}
|
| 41 |
for doc in docs:
|
| 42 |
text = doc.page_content
|
| 43 |
+
if re.search(r"Introduction|مقدمه", text, re.I):
|
| 44 |
+
sections["Introduction"].append(doc)
|
| 45 |
+
elif re.search(r"Methodology|روش", text, re.I):
|
| 46 |
+
sections["Methodology"].append(doc)
|
| 47 |
+
elif re.search(r"Results|نتایج", text, re.I):
|
| 48 |
+
sections["Results"].append(doc)
|
| 49 |
+
elif re.search(r"Discussion|بحث", text, re.I):
|
| 50 |
+
sections["Discussion"].append(doc)
|
| 51 |
+
elif re.search(r"References|Bibliography|منابع", text, re.I):
|
| 52 |
+
sections["References"].append(doc)
|
| 53 |
+
logger.info(f"Processed file: {pdf_path} - Number of chunks: {len(docs)}")
|
| 54 |
return docs, sections
|
| 55 |
except Exception as e:
|
| 56 |
+
logger.error(f"Error processing {pdf_path}: {str(e)}")
|
| 57 |
return None, None
|
| 58 |
|
| 59 |
def upload_and_process_pdf(pdf_files):
|
| 60 |
if not pdf_files:
|
| 61 |
+
return None, None, None, "Please upload at least one PDF file."
|
| 62 |
+
logger.info(f"Number of input files: {len(pdf_files)}")
|
| 63 |
all_docs = []
|
| 64 |
+
all_sections = {"Introduction": [], "Methodology": [], "Results": [], "Discussion": [], "References": []}
|
| 65 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 66 |
future_to_file = {executor.submit(process_single_pdf, pdf_file): pdf_file for pdf_file in pdf_files}
|
| 67 |
for future in concurrent.futures.as_completed(future_to_file):
|
|
|
|
| 72 |
all_sections[key].extend(sections[key])
|
| 73 |
else:
|
| 74 |
pdf_file = future_to_file[future]
|
| 75 |
+
return None, None, None, f"Error processing file: {pdf_file.name if hasattr(pdf_file, 'name') else pdf_file}"
|
| 76 |
+
logger.info(f"Total number of processed documents: {len(all_docs)}")
|
| 77 |
return None, all_docs, all_sections, None
|
| 78 |
|
| 79 |
def create_vector_db(docs):
|
| 80 |
if not docs:
|
| 81 |
+
return None, "No content was processed."
|
| 82 |
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=gemini_api_key)
|
| 83 |
try:
|
| 84 |
+
logger.info("Starting to build FAISS...")
|
| 85 |
vector_store = FAISS.from_documents(docs, embedding=embeddings)
|
| 86 |
+
logger.info(f"Vector database built with {len(docs)} documents.")
|
| 87 |
return vector_store, None
|
| 88 |
except Exception as e:
|
| 89 |
+
logger.error(f"Error creating vector database: {str(e)}")
|
| 90 |
+
return None, f"Error in vector processing: {str(e)}"
|
| 91 |
|
| 92 |
def extract_keywords(text):
|
| 93 |
try:
|
| 94 |
+
prompt = f"Extract 5 main keywords from the following text that represent the main topic:\n**Text:**\n{text[:2000]}\n**Keywords:**"
|
| 95 |
model = genai.GenerativeModel('gemini-pro')
|
| 96 |
response = model.generate_content(prompt)
|
| 97 |
+
keywords = response.text.split("**Keywords:**")[-1].strip().split(", ")
|
| 98 |
+
logger.info(f"Extracted keywords: {keywords}")
|
| 99 |
time.sleep(1)
|
| 100 |
return keywords[:5]
|
| 101 |
except Exception as e:
|
| 102 |
+
logger.error(f"Error extracting keywords: {str(e)}")
|
| 103 |
+
return ["research", "results", "method", "analysis", "topic"]
|
| 104 |
|
| 105 |
def translate_to_english(text):
|
| 106 |
try:
|
| 107 |
+
prompt = f"Translate the following text to English:\n**Text:**\n{text[:1000]}\n**Translation:**"
|
| 108 |
model = genai.GenerativeModel('gemini-pro')
|
| 109 |
response = model.generate_content(prompt)
|
| 110 |
+
translated_text = response.text.split("**Translation:**")[-1].strip()
|
| 111 |
+
logger.info(f"Translated text: {translated_text[:50]}...")
|
| 112 |
time.sleep(1)
|
| 113 |
return translated_text
|
| 114 |
except Exception as e:
|
| 115 |
+
logger.error(f"Error in translation: {str(e)}")
|
| 116 |
return text
|
| 117 |
|
| 118 |
def check_plagiarism(text):
|
|
|
|
| 121 |
translated_keywords = translate_to_english(" ".join(keywords))
|
| 122 |
query = translated_keywords
|
| 123 |
|
| 124 |
+
# Search in Google Scholar
|
| 125 |
url_scholar = f"https://scholar.google.com/scholar?q={query}"
|
| 126 |
response_scholar = requests.get(url_scholar, headers={"User-Agent": "Mozilla/5.0"})
|
| 127 |
soup_scholar = BeautifulSoup(response_scholar.text, 'html.parser')
|
| 128 |
results_scholar = []
|
| 129 |
for item in soup_scholar.find_all('h3', class_='gs_rt', limit=5):
|
| 130 |
title = item.get_text().strip()
|
| 131 |
+
link = item.find('a')['href'] if item.find('a') else "No link available"
|
| 132 |
author_info = item.find_next('div', class_='gs_a')
|
| 133 |
if author_info:
|
| 134 |
author_year = author_info.get_text().strip()
|
| 135 |
author_match = re.search(r"(.+?)(?: - (\d{4}))?", author_year)
|
| 136 |
+
author = author_match.group(1) if author_match.group(1) else "Unknown Author"
|
| 137 |
+
year = author_match.group(2) if author_match.group(2) else "Unknown"
|
| 138 |
else:
|
| 139 |
+
author, year = "Unknown Author", "Unknown"
|
| 140 |
results_scholar.append((title, link, author, year))
|
| 141 |
+
logger.info(f"Google Scholar results: {results_scholar}")
|
| 142 |
|
| 143 |
+
# Search in arXiv
|
| 144 |
url_arxiv = f"https://arxiv.org/search/?query={query}&searchtype=all&source=header"
|
| 145 |
response_arxiv = requests.get(url_arxiv, headers={"User-Agent": "Mozilla/5.0"})
|
| 146 |
soup_arxiv = BeautifulSoup(response_arxiv.text, 'html.parser')
|
| 147 |
results_arxiv = []
|
| 148 |
for item in soup_arxiv.find_all('p', class_='title', limit=5):
|
| 149 |
title = item.get_text().strip()
|
| 150 |
+
link = item.find_previous('a', class_='arxiv-url')['href'] if item.find_previous('a', class_='arxiv-url') else "No link available"
|
| 151 |
author_info = item.find_next('p', class_='authors')
|
| 152 |
year_info = item.find_next('p', class_='is-size-7')
|
| 153 |
+
author = author_info.get_text().replace("Authors:", "").strip() if author_info else "Unknown Author"
|
| 154 |
+
year = re.search(r"\d{4}", year_info.get_text() if year_info else "").group(0) if re.search(r"\d{4}", year_info.get_text() if year_info else "") else "Unknown"
|
| 155 |
results_arxiv.append((title, link, author, year))
|
| 156 |
+
logger.info(f"arXiv results: {results_arxiv}")
|
| 157 |
|
| 158 |
all_results = results_scholar + results_arxiv
|
| 159 |
if not all_results:
|
| 160 |
+
return "No significant similarity found.\n**Explanation:** Your text was compared with scientific resources in Google Scholar and arXiv, and no meaningful matches were found.\n**Status:** Plagiarism likelihood is very low."
|
| 161 |
|
| 162 |
max_similarity = 0
|
| 163 |
matched_texts = []
|
| 164 |
for title, link, author, year in all_results:
|
| 165 |
similarity = SequenceMatcher(None, text[:1000], title).ratio()
|
| 166 |
+
if similarity > 0.1: # Minimum 10% similarity for display
|
| 167 |
+
matched_texts.append(f"**Title:** {title}\n**Author:** {author}\n**Year:** {year}\n**Link:** {link}\n**Note:** This resource may have some similarity with your text.")
|
| 168 |
if similarity > max_similarity:
|
| 169 |
max_similarity = similarity
|
| 170 |
|
| 171 |
time.sleep(1)
|
| 172 |
similarity_percent = max_similarity * 100
|
| 173 |
if not matched_texts:
|
| 174 |
+
return "No significant similarity found.\n**Explanation:** Your text was compared with scientific resources and no matches were found.\n**Status:** Plagiarism likelihood is very low."
|
| 175 |
|
| 176 |
if similarity_percent > 20:
|
| 177 |
+
status = "Plagiarism is likely. Please review similar resources and add appropriate citations."
|
| 178 |
elif similarity_percent > 10:
|
| 179 |
+
status = "Low similarity. Possibly coincidental, but reviewing resources is recommended."
|
| 180 |
else:
|
| 181 |
+
status = "Very low similarity. Plagiarism likelihood is negligible."
|
| 182 |
|
| 183 |
+
output = (f"**Similarity Percentage:** {similarity_percent:.2f}%\n"
|
| 184 |
+
f"**Status:** {status}\n"
|
| 185 |
+
f"**Similar Resources Found:**\n" + "\n--------------------\n".join(matched_texts[:3]))
|
| 186 |
return output
|
| 187 |
except Exception as e:
|
| 188 |
+
logger.error(f"Error in plagiarism check: {str(e)}")
|
| 189 |
+
return f"Error in plagiarism check: {str(e)}\nPlease try again or contact support."
|
| 190 |
|
| 191 |
def suggest_resources(text):
|
| 192 |
try:
|
|
|
|
| 196 |
url_scholar = f"https://scholar.google.com/scholar?q={query}"
|
| 197 |
response_scholar = requests.get(url_scholar, headers={"User-Agent": "Mozilla/5.0"})
|
| 198 |
soup_scholar = BeautifulSoup(response_scholar.text, 'html.parser')
|
| 199 |
+
papers_scholar = [f"{h.get_text().strip()} (Link: {h.find('a')['href']})" for h in soup_scholar.find_all('h3')[:3] if h.find('a')]
|
| 200 |
|
| 201 |
url_arxiv = f"https://arxiv.org/search/?query={query}&searchtype=all&source=header"
|
| 202 |
response_arxiv = requests.get(url_arxiv, headers={"User-Agent": "Mozilla/5.0"})
|
| 203 |
soup_arxiv = BeautifulSoup(response_arxiv.text, 'html.parser')
|
| 204 |
+
papers_arxiv = [f"{paper.get_text().strip()} (Link: {paper.find_previous('a', class_='arxiv-url')['href']})"
|
| 205 |
for paper in soup_arxiv.find_all('p', class_='title')[:2] if paper.find_previous('a', class_='arxiv-url')]
|
| 206 |
|
| 207 |
resources = papers_scholar + papers_arxiv if papers_scholar else papers_arxiv
|
| 208 |
time.sleep(1)
|
| 209 |
+
return resources if resources else ["No resources found."]
|
| 210 |
except Exception as e:
|
| 211 |
+
logger.error(f"Error in suggesting resources: {str(e)}")
|
| 212 |
+
return ["Error in resource search"]
|
| 213 |
|
| 214 |
def evaluate_quality(docs, sections):
|
| 215 |
text = " ".join([doc.page_content for doc in docs])
|
|
|
|
| 218 |
suggestions = []
|
| 219 |
auto_fix = ""
|
| 220 |
|
| 221 |
+
# Criterion 1: References (Quality and Quantity)
|
| 222 |
ref_count = len(re.findall(r"\[\d+\]|[A-Za-z]+\s+\d{4}", text))
|
| 223 |
if ref_count > 15:
|
| 224 |
score += 30
|
| 225 |
+
explanation.append("Very strong and credible references (more than 15 citations from reputable journals).")
|
| 226 |
elif ref_count > 10:
|
| 227 |
score += 25
|
| 228 |
+
explanation.append("Sufficient and credible references (10-15 citations).")
|
| 229 |
elif ref_count > 0:
|
| 230 |
score += 15
|
| 231 |
+
explanation.append("Existing but limited references (fewer than 10 citations).")
|
| 232 |
+
suggestions.append("Add at least 5 sources from reputable journals (like IEEE, Springer, or Elsevier) with precise author and year citations.")
|
| 233 |
else:
|
| 234 |
+
explanation.append("No sufficient references found.")
|
| 235 |
+
suggestions.append("Complete the references section with at least 10 citations from peer-reviewed articles.")
|
| 236 |
+
auto_fix += "\n**Auto-fix - Sample Citation:**\n[1] Smith, J. (2020). 'Advanced Research Methods', Journal of Science, 15(3), 123-145."
|
| 237 |
|
| 238 |
+
# Criterion 2: Coherence, Writing, and Scientific Weight
|
| 239 |
words = text.split()
|
| 240 |
word_freq = Counter(words).most_common(10)
|
| 241 |
+
keywords = [word[0] for word in word_freq[:3]] if word_freq else ["research", "results", "method"]
|
| 242 |
scientific_terms = sum(1 for word in words if word.lower() in ["analysis", "data", "method", "result", "hypothesis", "theory"])
|
| 243 |
if word_freq and word_freq[0][1] > len(words) * 0.02 and scientific_terms > len(words) * 0.05:
|
| 244 |
score += 25
|
| 245 |
+
explanation.append("Excellent textual coherence and high scientific weight (focus on topic and use of scientific terms).")
|
| 246 |
else:
|
| 247 |
+
explanation.append("Poor textual coherence or low scientific weight (topic dispersion or lack of scientific terms).")
|
| 248 |
+
suggestions.append(f"Use keywords like {', '.join(keywords)} and scientific terms (like 'statistical analysis' or 'hypothesis') more frequently and make sentences smoother.")
|
| 249 |
try:
|
| 250 |
+
prompt = f"Rewrite the following paragraph to be more scientific, smoother, and with higher scientific weight:\n**Text:**\n{text[:500]}\n**Rewritten:**"
|
| 251 |
model = genai.GenerativeModel('gemini-pro')
|
| 252 |
response = model.generate_content(prompt)
|
| 253 |
+
auto_fix += f"\n**Auto-fix - Rewritten Paragraph:**\n{response.text.split('**Rewritten:**')[-1].strip()}"
|
| 254 |
time.sleep(1)
|
| 255 |
except Exception as e:
|
| 256 |
+
logger.error(f"Error in rewriting: {str(e)}")
|
| 257 |
+
auto_fix += "\n**Auto-fix - Rewritten:**\nError in rewriting, please manually revise the text."
|
| 258 |
|
| 259 |
+
# Criterion 3: Tables/Figures
|
| 260 |
+
if re.search(r"Table|Figure|جدول|شکل", text, re.I):
|
| 261 |
score += 20
|
| 262 |
+
explanation.append("Effective use of tables or figures to support findings.")
|
| 263 |
else:
|
| 264 |
+
explanation.append("No use of tables or figures.")
|
| 265 |
+
suggestions.append("Add a table for data and a figure (like a bar chart or line graph) for trends to make findings more comprehensible.")
|
| 266 |
|
| 267 |
+
# Criterion 4: Depth of Analysis and Scientific Weight
|
| 268 |
+
analysis_text = " ".join([doc.page_content for doc in sections.get("Results", []) + sections.get("Discussion", [])])
|
| 269 |
+
stats_found = bool(re.search(r"Statistic|Regression|ANOVA|T-test|Correlation|آمار", analysis_text, re.I))
|
| 270 |
if len(analysis_text.split()) > 1500 and stats_found:
|
| 271 |
score += 25
|
| 272 |
+
explanation.append("Very high depth of analysis (long and statistical with strong scientific weight).")
|
| 273 |
elif len(analysis_text.split()) > 1000:
|
| 274 |
score += 15
|
| 275 |
+
explanation.append("Acceptable depth of analysis (long but lacking sufficient statistical analysis).")
|
| 276 |
+
suggestions.append("Add advanced statistical analysis (like regression, ANOVA, or T-test) to strengthen findings.")
|
| 277 |
else:
|
| 278 |
+
explanation.append("Poor depth of analysis (short and without statistical analysis).")
|
| 279 |
+
suggestions.append("Expand the Results/Discussion section to at least 1500 words with comprehensive statistical analysis.")
|
| 280 |
|
| 281 |
+
# Measure scientific weight by global comparison
|
| 282 |
try:
|
| 283 |
keywords = extract_keywords(text)
|
| 284 |
translated_text = translate_to_english(" ".join(keywords))
|
|
|
|
| 288 |
soup_arxiv = BeautifulSoup(response_arxiv.text, 'html.parser')
|
| 289 |
arxiv_titles = [paper.get_text().strip() for paper in soup_arxiv.find_all('p', class_='title')[:3]]
|
| 290 |
if arxiv_titles:
|
| 291 |
+
suggestions.append(f"To increase scientific weight, refer to similar arXiv papers like '{arxiv_titles[0]}' and compare your findings with them.")
|
| 292 |
time.sleep(1)
|
| 293 |
except Exception as e:
|
| 294 |
+
logger.error(f"Error in scientific weight assessment: {str(e)}")
|
| 295 |
+
suggestions.append("Global comparison with scientific resources failed due to an error.")
|
| 296 |
|
| 297 |
score = max(min(score, 100), 0)
|
| 298 |
return score, "; ".join(explanation), "; ".join(suggestions), auto_fix
|
|
|
|
| 300 |
llm_gemini = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=gemini_api_key, convert_system_message_to_human=True, temperature=0.5)
|
| 301 |
|
| 302 |
academic_analysis_prompt = PromptTemplate(
|
| 303 |
+
template="""You are a professional academic analyst. Provide a deep and structured analysis of {section}:
|
| 304 |
+
1. Based solely on the provided text.
|
| 305 |
+
2. Including a review of the topic, methods, findings, and critique (if applicable).
|
| 306 |
+
3. In {language} with {detail_level} detail.
|
| 307 |
+
**Related Text:**
|
| 308 |
{context}
|
| 309 |
+
**User Question:** {question}
|
| 310 |
+
**Academic Analysis:**""",
|
| 311 |
input_variables=["section", "context", "question", "language", "detail_level"]
|
| 312 |
)
|
| 313 |
|
| 314 |
summary_prompt = PromptTemplate(
|
| 315 |
+
template="""You are an expert in academic writing. Produce a structured scientific summary (200-300 words) of the following text in {language} that includes:
|
| 316 |
+
1. Research objective
|
| 317 |
+
2. Methodology
|
| 318 |
+
3. Main findings
|
| 319 |
+
4. Conclusion
|
| 320 |
+
**Text:**
|
| 321 |
{context}
|
| 322 |
+
**Summary:**""",
|
| 323 |
input_variables=["context", "language"]
|
| 324 |
)
|
| 325 |
|
| 326 |
general_qa_prompt = PromptTemplate(
|
| 327 |
+
template="""You are an intelligent assistant. Answer the user's question in {language}:
|
| 328 |
+
**User Question:** {question}
|
| 329 |
+
Answer:""",
|
| 330 |
input_variables=["question", "language"]
|
| 331 |
)
|
| 332 |
|
| 333 |
plagiarism_prompt = PromptTemplate(
|
| 334 |
+
template="""Report the percentage of similarity of the following text with English resources:
|
| 335 |
+
**Text:**
|
| 336 |
{context}
|
| 337 |
+
**Result:** {similarity}""",
|
| 338 |
input_variables=["context", "similarity"]
|
| 339 |
)
|
| 340 |
|
| 341 |
quality_prompt = PromptTemplate(
|
| 342 |
+
template="""You are a professional academic evaluator. Evaluate the scientific quality of the following text:
|
| 343 |
+
**Text:**
|
| 344 |
{context}
|
| 345 |
+
**Score:** {score}/100
|
| 346 |
+
**Explanations:** {explanation}
|
| 347 |
+
**Improvement Suggestions:** {suggestions}""",
|
| 348 |
input_variables=["context", "score", "explanation", "suggestions"]
|
| 349 |
)
|
| 350 |
|
| 351 |
def create_conversation_chain(vector_store, docs, mode, language, detail_level, section=None):
|
| 352 |
+
if mode == "Academic Analysis (RAG)":
|
| 353 |
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
|
| 354 |
chain = ConversationalRetrievalChain.from_llm(
|
| 355 |
llm=llm_gemini,
|
| 356 |
retriever=retriever,
|
| 357 |
return_source_documents=True,
|
| 358 |
+
combine_docs_chain_kwargs={"prompt": academic_analysis_prompt.partial(language=language, detail_level=detail_level, section=section or "Entire Document")},
|
| 359 |
verbose=True
|
| 360 |
)
|
| 361 |
+
elif mode == "Auto Summary":
|
| 362 |
chain = LLMChain(llm=llm_gemini, prompt=summary_prompt.partial(language=language))
|
| 363 |
+
elif mode == "Plagiarism Check":
|
| 364 |
chain = LLMChain(llm=llm_gemini, prompt=plagiarism_prompt.partial(language=language))
|
| 365 |
+
elif mode == "Quality Evaluation":
|
| 366 |
chain = LLMChain(llm=llm_gemini, prompt=quality_prompt.partial(language=language))
|
| 367 |
else:
|
| 368 |
chain = LLMChain(llm=llm_gemini, prompt=general_qa_prompt.partial(language=language))
|
|
|
|
| 370 |
|
| 371 |
def academic_chatbot(pdf_file, mode, query, language, detail_level, section_dropdown):
|
| 372 |
start_time = time.time()
|
| 373 |
+
logger.info(f"Starting processing - Mode: {mode}, Question: {query}, Language: {language}, Detail: {detail_level}, Section: {section_dropdown}")
|
| 374 |
|
| 375 |
+
if mode != "Standard Response" and not pdf_file:
|
| 376 |
+
return "Please upload at least one PDF file."
|
| 377 |
|
| 378 |
+
if mode == "Standard Response":
|
| 379 |
chain = create_conversation_chain(None, None, mode, language, detail_level)
|
| 380 |
try:
|
| 381 |
result = chain.invoke({"question": query})["text"]
|
| 382 |
+
return f"{result}\n\n⏱ Processing time: {time.time() - start_time:.2f} seconds"
|
| 383 |
except Exception as e:
|
| 384 |
+
logger.error(f"Error in standard processing: {str(e)}")
|
| 385 |
+
return f"Error: {str(e)}"
|
| 386 |
|
| 387 |
pdf_files = pdf_file if isinstance(pdf_file, list) else [pdf_file]
|
| 388 |
_, docs, sections, error = upload_and_process_pdf(pdf_files)
|
| 389 |
if error:
|
| 390 |
return error
|
| 391 |
|
| 392 |
+
target_docs = docs if section_dropdown == "Entire Document" else sections.get(section_dropdown, docs)
|
| 393 |
context = " ".join([doc.page_content for doc in target_docs])
|
| 394 |
|
| 395 |
vector_store = None
|
| 396 |
+
if mode in ["Academic Analysis (RAG)", "Plagiarism Check", "Quality Evaluation"]:
|
| 397 |
vector_store, vectordb_error = create_vector_db(target_docs)
|
| 398 |
if vectordb_error:
|
| 399 |
return vectordb_error
|
| 400 |
|
| 401 |
chain = create_conversation_chain(vector_store, target_docs, mode, language, detail_level, section_dropdown)
|
| 402 |
try:
|
| 403 |
+
if mode == "Auto Summary":
|
| 404 |
time.sleep(2)
|
| 405 |
result = chain.invoke({"context": context[:5000]})["text"]
|
| 406 |
+
elif mode == "Plagiarism Check":
|
| 407 |
plagiarism_result = check_plagiarism(context)
|
| 408 |
result = plagiarism_result
|
| 409 |
+
elif mode == "Quality Evaluation":
|
| 410 |
score, explanation, suggestions, auto_fix = evaluate_quality(target_docs, sections)
|
| 411 |
time.sleep(2)
|
| 412 |
result = chain.invoke({"context": context[:5000], "score": score, "explanation": explanation, "suggestions": suggestions})["text"] + auto_fix
|
| 413 |
else:
|
| 414 |
result = chain.invoke({"question": query, "chat_history": []})["answer"]
|
| 415 |
|
| 416 |
+
if mode not in ["Plagiarism Check", "Quality Evaluation"]:
|
| 417 |
resources = suggest_resources(context)
|
| 418 |
+
result += "\n\n**Suggested Resources:**\n" + "\n".join(resources)
|
| 419 |
|
| 420 |
+
return f"{result}\n\n⏱ Processing time: {time.time() - start_time:.2f} seconds"
|
| 421 |
except Exception as e:
|
| 422 |
+
logger.error(f"Error in processing: {str(e)}")
|
| 423 |
if "429" in str(e):
|
| 424 |
+
return "Error: Rate limit exceeded for Gemini API. Please wait a few minutes and try again."
|
| 425 |
+
return f"Error: {str(e)}"
|
| 426 |
|
| 427 |
+
# CSS برای چپچین (LTR) و طراحی ساده با Light Mode
|
| 428 |
css = """
|
| 429 |
+
body {background-color: #f0f4f8; font-family: 'Arial', sans-serif; color: #2c3e50; transition: all 0.3s ease;}
|
| 430 |
+
.gr-button {background-color: #4CAF50; color: white; border: none; padding: 12px 25px; border-radius: 5px; margin-right: 15px; width: auto; display: inline-block; transition: all 0.3s ease;}
|
| 431 |
+
.gr-button:hover {background-color: #45a049;}
|
| 432 |
+
.gr-textbox, .gr-dropdown, .gr-radio, .gr-file {background-color: #ffffff; border: 1px solid #ddd; border-radius: 5px; padding: 12px; margin-right: 15px; width: 95%; box-sizing: border-box; font-size: 16px; color: #2c3e50; transition: all 0.3s ease;}
|
| 433 |
+
.gr-row {display: flex; justify-content: space-between; margin-bottom: 15px;}
|
| 434 |
+
.gr-column {padding: 15px;}
|
| 435 |
+
h1, h2, h3 {color: #2c3e50; text-align: left; margin-bottom: 20px; font-size: 24px; transition: all 0.3s ease;}
|
| 436 |
+
.markdown {color: #2c3e50; margin-bottom: 15px; font-size: 16px; transition: all 0.3s ease;}
|
| 437 |
+
/* نمایش بهتر کلمات انگلیسی (بدون نیاز به راستچین) */
|
| 438 |
+
.english {display: inline; font-family: 'Arial', sans-serif; background: #e0e0e0; padding: 2px 4px; border-radius: 3px; box-shadow: 0 1px 2px rgba(0,0,0,0.1);}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
"""
|
| 440 |
|
| 441 |
+
with gr.Blocks(css=css, title="Professional Thesis Analyzer with Gemini") as iface:
|
| 442 |
+
gr.Markdown("# Professional Thesis Analyzer with Gemini")
|
| 443 |
+
gr.Markdown("Upload your <span class='english'>PDF</span> file and use the analysis, summary, plagiarism check, or quality evaluation features.")
|
| 444 |
with gr.Row():
|
| 445 |
with gr.Column():
|
| 446 |
+
pdf_input = gr.File(file_types=['.pdf'], label="Upload <span class='english'>PDF</span> File", file_count="multiple")
|
| 447 |
mode = gr.Radio(
|
| 448 |
+
["Academic Analysis (RAG)", "Auto Summary", "Plagiarism Check", "Quality Evaluation", "Standard Response"],
|
| 449 |
+
label="Processing Mode",
|
| 450 |
+
value="Academic Analysis (RAG)"
|
| 451 |
)
|
| 452 |
+
query = gr.Textbox(lines=3, placeholder="Enter your question or request here...", label="Question or Request")
|
| 453 |
+
section = gr.Dropdown(["Entire Document", "Introduction", "Methodology", "Results", "Discussion", "References"], label="Target Section", value="Entire Document")
|
| 454 |
+
language = gr.Dropdown(["English", "Farsi"], label="Response Language", value="English")
|
| 455 |
+
detail = gr.Dropdown(["Brief", "Detailed"], label="Detail Level", value="Detailed")
|
| 456 |
+
submit = gr.Button("Submit")
|
| 457 |
with gr.Column():
|
| 458 |
+
output = gr.Textbox(label="Processing Result", lines=10, placeholder="Results will be displayed here...")
|
| 459 |
|
| 460 |
submit.click(
|
| 461 |
fn=academic_chatbot,
|