scopus / app.py
mnoorchenar's picture
Update 2026-01-30 19:35:53
2c0b974
"""
Flask Application for Reference Management Pipeline
Complete implementation matching overleaf.py functionality
Includes LaTeX citation parsing, BibTeX processing, and full pipeline
FEATURES:
- Fixed abbreviations to include periods (ISO 4 standard): "Energy" β†’ "Ener."
- LaTeX manuscript analysis for citation frequency and section tracking
- Filter references by section
- Clear entire database
"""
from flask import Flask, render_template, request, jsonify, send_file
import sqlite3
import pandas as pd
import os
import json
from datetime import datetime
import re
from difflib import SequenceMatcher
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import io
import time
import random
import hashlib
from werkzeug.utils import secure_filename
app = Flask(__name__)
app.config['DATABASE'] = 'refs_management.db'
app.config['API_KEY'] = os.environ.get('API_KEY', 'your-secret-key-here')
app.config['ENVIRONMENT'] = os.environ.get('ENVIRONMENT', 'development')
app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max
# Create upload folder
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
# Prepositions to keep lowercase in abbreviations
LOWERCASE_WORDS = {"and", "or", "in", "on", "of", "for", "to", "the", "a", "an", "with", "at", "by", "from"}
# =====================================================================
# HTTP CLIENT WITH RETRIES
# =====================================================================
def make_http_session():
"""Create HTTP session with retries and proper headers"""
s = requests.Session()
retry = Retry(
total=5,
backoff_factor=0.8,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["GET"]
)
s.mount("https://", HTTPAdapter(max_retries=retry))
s.headers.update({
"User-Agent": "RefsManagement/1.0 (mailto:contact@example.com)",
"Accept": "application/json",
})
return s
HTTP = make_http_session()
# =====================================================================
# AUTHENTICATION
# =====================================================================
def check_api_key():
"""Check API key for protected routes"""
if app.config['ENVIRONMENT'] == 'development':
return True
api_key = request.headers.get('X-API-Key')
return api_key == app.config['API_KEY']
# =====================================================================
# LATEX CITATION PARSING (from overleaf.py)
# =====================================================================
def parse_citations_from_tex(tex_content: str) -> pd.DataFrame:
"""Parse citations from LaTeX content with section tracking"""
print("πŸ“– Parsing citations from LaTeX")
lines = tex_content.split('\n')
clean_text = "\n".join(line for line in lines if not line.strip().startswith("%"))
section_pattern = re.compile(r'\\section\{([^}]*)\}(?:\\label\{[^}]*\})?')
cite_pattern = re.compile(r'\\cite\{([^}]*)\}')
sections = section_pattern.split(clean_text)
citations, ref_sections = [], {}
for i in range(1, len(sections), 2):
if i >= len(sections):
break
section_name = sections[i].strip()
section_text = sections[i+1] if i+1 < len(sections) else ""
matches = cite_pattern.findall(section_text)
for match in matches:
for key in match.split(","):
ref = key.strip()
citations.append(ref)
if ref not in ref_sections:
ref_sections[ref] = []
if section_name not in ref_sections[ref]:
ref_sections[ref].append(section_name)
freq, order = {}, []
for c in citations:
if c not in freq:
order.append(c)
freq[c] = freq.get(c, 0) + 1
df = pd.DataFrame({
"Reference": order,
"Frequency": [freq[c] for c in order],
"Sections": [", ".join(ref_sections[c]) for c in order]
})
print(f"βœ… Found {len(df)} unique citations")
return df
def merge_citations_with_bib(citations_df: pd.DataFrame, bib_df: pd.DataFrame) -> pd.DataFrame:
"""Merge citations with BibTeX entries"""
print("πŸ”— Merging citations with BibTeX")
bib_lookup = bib_df.set_index("Key").to_dict(orient="index")
merged_records = []
for _, row in citations_df.iterrows():
key = row["Reference"]
bib_info = bib_lookup.get(key, {})
merged_records.append({
"Reference": key,
"Frequency": row["Frequency"],
"Sections": row["Sections"],
"Type": bib_info.get("Type", ""),
"Authors": bib_info.get("Authors", ""),
"Title": bib_info.get("Title", ""),
"Journal/Booktitle": bib_info.get("Journal/Booktitle", ""),
"Year": bib_info.get("Year", ""),
"Publisher": bib_info.get("Publisher", ""),
"Volume": bib_info.get("Volume", ""),
"Pages": bib_info.get("Pages", ""),
"DOI": bib_info.get("DOI", ""),
"BibTeX": bib_info.get("BibTeX", "")
})
df = pd.DataFrame(merged_records)
print(f"βœ… Merged into {len(df)} rows")
return df
# =====================================================================
# UTILITY FUNCTIONS
# =====================================================================
def abbreviate_journal_custom(title: str) -> str:
"""
Custom abbreviation with periods (ISO 4 standard)
Examples:
"Energy" β†’ "Ener."
"Applied Energy" β†’ "Appl. Ener."
"Journal of Energy" β†’ "J. of Ener."
"IEEE Transactions on Neural Networks" β†’ "IEEE Trans. on Neural Netw."
"""
if not title:
return ""
words = title.split()
abbr = []
for i, word in enumerate(words):
# Keep prepositions and conjunctions in lowercase (except at start)
if word.lower() in LOWERCASE_WORDS and i != 0:
abbr.append(word.lower())
# Keep acronyms as-is (words with 2+ uppercase letters)
elif sum(1 for c in word if c.isupper()) >= 2:
abbr.append(word)
# Abbreviate long words (>4 letters) with period
elif len(word) > 4:
# Take first 4 letters and add period
abbr.append(word[:4].capitalize() + ".")
# Short words (≀4 letters): add period
else:
abbr.append(word.capitalize() + ".")
return " ".join(abbr)
def get_db_connection():
"""Get database connection"""
conn = sqlite3.connect(app.config['DATABASE'])
conn.row_factory = sqlite3.Row
return conn
def init_db():
"""Initialize database with proper schema"""
conn = get_db_connection()
cur = conn.cursor()
# Create table with all columns in proper order
cur.execute("""
CREATE TABLE IF NOT EXISTS bibliography (
id INTEGER PRIMARY KEY AUTOINCREMENT,
index_num INTEGER,
session_id TEXT,
reference TEXT,
frequency INTEGER,
sections TEXT,
key TEXT UNIQUE,
doi TEXT,
type TEXT,
authors TEXT,
title TEXT,
journal_booktitle TEXT,
year TEXT,
year_int INTEGER,
publisher TEXT,
volume TEXT,
pages TEXT,
bibtex TEXT,
crossref_bibtex TEXT,
crossref_bibtex_localkey TEXT,
title_similarity INTEGER,
journal_abbreviation TEXT,
crossref_bibtex_abbrev TEXT,
crossref_bibtex_protected TEXT,
used TEXT,
imported_date TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
# Create index on DOI
cur.execute("""
CREATE INDEX IF NOT EXISTS idx_bib_doi
ON bibliography(doi)
WHERE doi IS NOT NULL AND doi != ''
""")
conn.commit()
conn.close()
def extract_year_int(year_str):
"""Extract integer year from year string"""
if not year_str:
return None
match = re.search(r'\d{4}', str(year_str))
return int(match.group()) if match else None
def scan_brace_balanced_value(text, start_pos):
"""Scan for brace-balanced field value"""
if start_pos >= len(text):
return "", start_pos
if text[start_pos] == '{':
depth = 1
pos = start_pos + 1
while pos < len(text) and depth > 0:
if text[pos] == '{':
depth += 1
elif text[pos] == '}':
depth -= 1
pos += 1
return text[start_pos+1:pos-1], pos
elif text[start_pos] == '"':
pos = start_pos + 1
while pos < len(text):
if text[pos] == '"' and text[pos-1] != '\\':
return text[start_pos+1:pos], pos + 1
pos += 1
return text[start_pos+1:], len(text)
else:
pos = start_pos
while pos < len(text) and text[pos] not in ',}':
pos += 1
return text[start_pos:pos].strip(), pos
def parse_bibtex_entry(entry_text):
"""Parse single BibTeX entry with proper brace balancing"""
match = re.match(r'@(\w+)\s*\{([^,]+),', entry_text)
if not match:
return None
entry_type, entry_key = match.groups()
fields = {}
fields_start = entry_text.find(entry_key) + len(entry_key) + 1
fields_text = entry_text[fields_start:]
pos = 0
while pos < len(fields_text):
while pos < len(fields_text) and fields_text[pos] in ' \t\n\r,':
pos += 1
if pos >= len(fields_text) or fields_text[pos] == '}':
break
field_match = re.match(r'(\w+)\s*=\s*', fields_text[pos:])
if not field_match:
break
field_name = field_match.group(1).lower()
pos += field_match.end()
value, new_pos = scan_brace_balanced_value(fields_text, pos)
fields[field_name] = value.strip()
pos = new_pos
return {
'type': entry_type,
'key': entry_key.strip(),
'fields': fields
}
def parse_bibtex_input(bibtex_content):
"""Parse BibTeX content from user input"""
entries = ["@" + e for e in bibtex_content.split("@") if e.strip()]
papers = []
for entry in entries:
parsed = parse_bibtex_entry(entry)
if not parsed:
continue
fields = parsed['fields']
papers.append({
"Key": parsed['key'],
"Type": parsed['type'],
"Authors": fields.get("author", "").strip(),
"Title": fields.get("title", "").strip(),
"Journal/Booktitle": fields.get("journal", fields.get("booktitle", "")).strip(),
"Year": fields.get("year", "").strip(),
"Publisher": fields.get("publisher", fields.get("organization", "")).strip(),
"Volume": fields.get("volume", "").strip(),
"Pages": fields.get("pages", "").strip(),
"DOI": fields.get("doi", "").strip(),
"BibTeX": entry.strip(),
"Imported_Date": datetime.now().isoformat()
})
return pd.DataFrame(papers).drop_duplicates(subset="Key", keep="first").reset_index(drop=True)
def clean_bibtex_fields(bibtex):
"""Remove unwanted fields from BibTeX entries"""
if not bibtex:
return bibtex
fields_to_remove = ['url', 'source', 'publication_stage', 'note', 'abstract']
for field in fields_to_remove:
pattern = rf'\s*{field}\s*=\s*'
pos = 0
result = []
while pos < len(bibtex):
match = re.search(pattern, bibtex[pos:], re.IGNORECASE)
if not match:
result.append(bibtex[pos:])
break
result.append(bibtex[pos:pos + match.start()])
value_start = pos + match.end()
_, value_end = scan_brace_balanced_value(bibtex, value_start)
while value_end < len(bibtex) and bibtex[value_end] in ' \t\n\r,':
value_end += 1
pos = value_end
bibtex = ''.join(result)
bibtex = re.sub(r'\n\s*\n\s*\n', '\n\n', bibtex)
bibtex = re.sub(r',\s*,', ',', bibtex)
bibtex = re.sub(r',(\s*)\}', r'\1}', bibtex)
lines = [line for line in bibtex.split('\n') if line.strip()]
return '\n'.join(lines)
def protect_acronyms_in_fields(bibtex):
"""Protect acronyms with braces"""
if not bibtex:
return bibtex
def wrap_token(token):
if token.startswith("{") and token.endswith("}"):
return token
if sum(1 for c in token if c.isupper()) >= 2:
return "{" + token + "}"
return token
def process_field_value(value):
if value.startswith("{") and value.endswith("}"):
inner = value[1:-1]
if not ('{' in inner and '}' in inner):
return value
tokens = re.split(r'(\s+)', value)
fixed = "".join(wrap_token(tok) if tok.strip() else tok for tok in tokens)
fixed = re.sub(r'\{\{([^{}]+)\}\}', r'{\1}', fixed)
return fixed
for field in ["title", "booktitle", "journal"]:
pattern = rf'({field}\s*=\s*)'
matches = list(re.finditer(pattern, bibtex, re.IGNORECASE))
for match in reversed(matches):
field_start = match.end()
value, value_end = scan_brace_balanced_value(bibtex, field_start)
if value:
processed = process_field_value(value)
new_field = f"{match.group(1)}{{{processed}}}"
bibtex = bibtex[:match.start()] + new_field + bibtex[value_end:]
return bibtex
def replace_bibtex_key(bibtex, new_key):
"""Replace the citation key in a BibTeX entry"""
if not bibtex:
return bibtex
try:
start_brace = bibtex.index("{")
first_comma = bibtex.index(",", start_brace)
entry_type = bibtex[:start_brace]
new_start = f"{entry_type}{{{new_key},"
return new_start + bibtex[first_comma+1:]
except ValueError:
return bibtex
def enrich_with_crossref(df):
"""Enrich references with Crossref data"""
enriched_rows = []
for idx, row in df.iterrows():
enriched_data = dict(row)
if not row.get('Title'):
enriched_data['Crossref_BibTeX'] = row.get('BibTeX', '')
enriched_data['Title_Similarity'] = 0
enriched_rows.append(enriched_data)
continue
query_parts = [row['Title']]
if row.get('Authors'):
query_parts.append(row['Authors'].split(',')[0])
if row.get('Journal/Booktitle'):
query_parts.append(row['Journal/Booktitle'])
if row.get('Year'):
query_parts.append(row['Year'])
query = " ".join(query_parts)
try:
url = f"https://api.crossref.org/works?query.bibliographic={requests.utils.quote(query)}&rows=3"
response = HTTP.get(url, timeout=15)
response.raise_for_status()
items = response.json().get("message", {}).get("items", [])
best_score = 0
crossref_bibtex = row.get('BibTeX', '')
best_doi = row.get('DOI', '')
for item in items:
cr_title = item.get("title", [""])[0]
score = SequenceMatcher(None, row['Title'].lower(), cr_title.lower()).ratio()
if row.get('Year') and 'published-print' in item:
cr_year = str(item['published-print'].get('date-parts', [['']])[0][0])
if row['Year'].strip() == cr_year:
score = min(1.0, score + 0.1)
if score > best_score:
best_score = score
best_doi = item.get('DOI', best_doi)
if best_doi:
try:
bibtex_response = HTTP.get(
f"https://doi.org/{best_doi}",
headers={"Accept": "application/x-bibtex"},
timeout=15
)
if bibtex_response.status_code == 200:
crossref_bibtex = bibtex_response.text.strip()
except Exception as e:
print(f"⚠️ BibTeX fetch failed for DOI {best_doi}: {e}")
enriched_data['Crossref_BibTeX'] = crossref_bibtex if best_score >= 0.85 else row.get('BibTeX', '')
enriched_data['Title_Similarity'] = int(round(best_score * 100))
if best_doi:
enriched_data['DOI'] = best_doi
except Exception as e:
print(f"⚠️ Crossref enrichment failed: {e}")
enriched_data['Crossref_BibTeX'] = row.get('BibTeX', '')
enriched_data['Title_Similarity'] = 0
time.sleep(0.15 + random.uniform(0, 0.25))
enriched_rows.append(enriched_data)
return pd.DataFrame(enriched_rows)
def add_journal_abbreviations(df):
"""Add journal abbreviations and create all BibTeX versions"""
abbreviated_rows = []
for idx, row in df.iterrows():
journal = row.get('Journal/Booktitle', '')
journal_abbrev = abbreviate_journal_custom(journal)
row_data = dict(row)
row_data['Journal_Abbreviation'] = journal_abbrev
# Create LocalKey version
key_to_use = row_data.get('Key') or row_data.get('Reference') or f"ref_{idx}"
if row_data.get('Crossref_BibTeX'):
row_data['Crossref_BibTeX_LocalKey'] = replace_bibtex_key(
row_data['Crossref_BibTeX'],
key_to_use
)
else:
row_data['Crossref_BibTeX_LocalKey'] = row_data.get('BibTeX', '')
# Create abbreviated version
if journal_abbrev and row_data.get('Crossref_BibTeX_LocalKey'):
new_bib = row_data['Crossref_BibTeX_LocalKey'].strip()
new_bib = re.sub(
r'(journal\s*=\s*\{)[^}]+(\})',
rf'\1{journal_abbrev}\2',
new_bib,
flags=re.IGNORECASE
)
row_data['Crossref_BibTeX_Abbrev'] = new_bib
else:
row_data['Crossref_BibTeX_Abbrev'] = row_data.get('Crossref_BibTeX_LocalKey', row_data.get('BibTeX', ''))
# Create protected version
row_data['Crossref_BibTeX_Protected'] = protect_acronyms_in_fields(
row_data.get('Crossref_BibTeX_Abbrev', row_data.get('BibTeX', ''))
)
# Clean all versions
for bib_col in ['BibTeX', 'Crossref_BibTeX', 'Crossref_BibTeX_LocalKey',
'Crossref_BibTeX_Abbrev', 'Crossref_BibTeX_Protected']:
if row_data.get(bib_col):
row_data[bib_col] = clean_bibtex_fields(row_data[bib_col])
abbreviated_rows.append(row_data)
return pd.DataFrame(abbreviated_rows)
# =====================================================================
# ROUTES
# =====================================================================
@app.route('/')
def index():
"""Main page"""
return render_template('index.html')
@app.route('/api/process', methods=['POST'])
def process_bibtex():
"""Process BibTeX content with optional LaTeX analysis"""
if not check_api_key():
return jsonify({'error': 'Unauthorized'}), 401
try:
# Handle both JSON and FormData
if request.is_json:
data = request.get_json()
bibtex_content = data.get('bibtex_content') or data.get('bibtex', '')
input_mode = data.get('input_mode', 'bibtex')
enrich = data.get('enrich', False)
abbreviate = data.get('abbreviate', False)
protect = data.get('protect', False)
save_to_db = data.get('save_to_db', False)
latex_file = None
else:
# FormData from file upload
bibtex_content = request.form.get('bibtex_content', '')
input_mode = request.form.get('input_mode', 'bibtex')
enrich = request.form.get('enrich', 'false').lower() == 'true'
abbreviate = request.form.get('abbreviate', 'false').lower() == 'true'
protect = request.form.get('protect', 'false').lower() == 'true'
save_to_db = request.form.get('save_to_db', 'false').lower() == 'true'
latex_file = request.files.get('latex_file')
print(f"πŸ“₯ Received: {len(bibtex_content)} chars, mode={input_mode}")
# TITLE MODE: Search Crossref for each title
if input_mode == 'title':
titles = [line.strip() for line in bibtex_content.split('\n') if line.strip()]
print(f"πŸ” Title mode: searching for {len(titles)} titles")
if not titles:
return jsonify({'error': 'No titles provided'}), 400
# Convert titles to BibTeX entries via Crossref
bibtex_entries = []
for i, title in enumerate(titles, 1):
print(f" [{i}/{len(titles)}] Searching: {title[:50]}...")
try:
url = f"https://api.crossref.org/works?query.bibliographic={requests.utils.quote(title)}&rows=1"
response = HTTP.get(url, timeout=15)
items = response.json().get("message", {}).get("items", [])
if items and items[0].get('DOI'):
doi = items[0]['DOI']
bibtex_r = HTTP.get(
f"https://doi.org/{doi}",
headers={"Accept": "application/x-bibtex"},
timeout=15
)
if bibtex_r.status_code == 200:
bibtex_entries.append(bibtex_r.text.strip())
print(f" βœ… Found via DOI: {doi}")
time.sleep(random.uniform(1, 2))
except Exception as e:
print(f" ⚠️ Failed: {e}")
bibtex_content = '\n\n'.join(bibtex_entries)
print(f"βœ… Retrieved {len(bibtex_entries)} BibTeX entries from titles")
if not bibtex_content:
return jsonify({'error': 'No BibTeX entries found for the provided titles'}), 400
if not bibtex_content or len(bibtex_content.strip()) == 0:
return jsonify({'error': 'No BibTeX content provided'}), 400
# Parse BibTeX
df = parse_bibtex_input(bibtex_content)
if df.empty:
return jsonify({'error': 'No valid BibTeX entries found'}), 400
# Parse LaTeX file if provided
latex_analyzed = False
citations_found = 0
if latex_file:
print("πŸ“„ LaTeX file provided, analyzing...")
try:
latex_content = latex_file.read().decode('utf-8')
citations_df = parse_citations_from_tex(latex_content)
citations_found = len(citations_df)
# Merge LaTeX citation data with BibTeX data
df = merge_citations_with_bib(citations_df, df)
df.insert(0, "Index", range(1, len(df) + 1))
latex_analyzed = True
print(f"βœ… LaTeX analyzed: {citations_found} citations found")
except Exception as e:
print(f"⚠️ LaTeX analysis failed: {e}")
# Enrich if requested
if enrich:
df = enrich_with_crossref(df)
else:
df['Crossref_BibTeX'] = df['BibTeX']
df['Title_Similarity'] = 0
# Always create all versions
df = add_journal_abbreviations(df)
# Save to database if requested
db_id = None
if save_to_db:
conn = get_db_connection()
cursor = conn.cursor()
session_id = datetime.now().isoformat()
for _, row in df.iterrows():
doi = row.get('DOI', '').strip()
year_int = extract_year_int(row.get('Year', ''))
key_val = row.get('Reference') or row.get('Key', f"ref_{row.name}")
try:
cursor.execute('''
INSERT OR REPLACE INTO bibliography
(index_num, session_id, reference, frequency, sections, key, doi, type,
authors, title, journal_booktitle, year, year_int, publisher, volume, pages,
bibtex, crossref_bibtex, crossref_bibtex_localkey, title_similarity,
journal_abbreviation, crossref_bibtex_abbrev, crossref_bibtex_protected,
imported_date)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
row.get('Index'), session_id, row.get('Reference', key_val),
row.get('Frequency', 0), row.get('Sections', ''),
key_val, doi, row.get('Type', ''), row.get('Authors', ''),
row.get('Title', ''), row.get('Journal/Booktitle', ''),
row.get('Year', ''), year_int, row.get('Publisher', ''),
row.get('Volume', ''), row.get('Pages', ''),
row.get('BibTeX', ''), row.get('Crossref_BibTeX', ''),
row.get('Crossref_BibTeX_LocalKey', ''),
row.get('Title_Similarity', 0), row.get('Journal_Abbreviation', ''),
row.get('Crossref_BibTeX_Abbrev', ''),
row.get('Crossref_BibTeX_Protected', ''),
datetime.now().isoformat()
))
except sqlite3.IntegrityError as e:
print(f"⚠️ DB insert failed for {key_val}: {e}")
conn.commit()
db_id = session_id
conn.close()
# Determine final BibTeX column
if protect:
final_bibtex_col = 'Crossref_BibTeX_Protected'
elif abbreviate:
final_bibtex_col = 'Crossref_BibTeX_Abbrev'
else:
final_bibtex_col = 'Crossref_BibTeX_LocalKey'
# Prepare response columns
response_cols = ['Key', 'Type', 'Authors', 'Title', 'Journal/Booktitle', 'Year', final_bibtex_col]
if latex_analyzed:
response_cols.insert(6, 'Frequency')
response_cols.insert(7, 'Sections')
response_df = df[[col for col in response_cols if col in df.columns]].copy()
response_df.columns = list(response_df.columns[:-1]) + ['Final_BibTeX']
return jsonify({
'success': True,
'count': len(df),
'db_id': db_id,
'latex_analyzed': latex_analyzed,
'citations_found': citations_found,
'data': response_df.to_dict(orient='records'),
'full_data': df.to_dict(orient='records')
})
except Exception as e:
import traceback
traceback.print_exc()
return jsonify({'error': str(e)}), 500
@app.route('/api/sections/list', methods=['GET'])
def list_sections():
"""Get list of all unique sections from database"""
try:
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute('SELECT DISTINCT sections FROM bibliography WHERE sections IS NOT NULL AND sections != ""')
rows = cursor.fetchall()
conn.close()
# Parse and deduplicate sections
all_sections = set()
for row in rows:
if row[0]:
sections = [s.strip() for s in row[0].split(',')]
all_sections.update(sections)
sections_list = sorted(list(all_sections))
return jsonify({
'success': True,
'sections': sections_list
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/sections/references', methods=['GET'])
def get_references_by_section():
"""Get all references for a specific section"""
section = request.args.get('section', '')
if not section:
return jsonify({'error': 'Section parameter required'}), 400
try:
conn = get_db_connection()
cursor = conn.cursor()
# Find all references that contain this section
cursor.execute('''
SELECT key, title, authors, year, frequency, sections, reference
FROM bibliography
WHERE sections LIKE ?
''', (f'%{section}%',))
rows = cursor.fetchall()
conn.close()
references = []
for row in rows:
# Calculate frequency in this specific section
sections_list = [s.strip() for s in row[5].split(',') if s.strip()]
if section in sections_list:
# Count occurrences in this section (simplified - assumes equal distribution)
total_freq = row[4] or 0
num_sections = len(sections_list)
freq_in_section = total_freq // num_sections if num_sections > 0 else total_freq
references.append({
'key': row[0],
'title': row[1],
'authors': row[2],
'year': row[3],
'frequency_in_section': freq_in_section,
'total_frequency': total_freq,
'all_sections': row[5]
})
return jsonify({
'success': True,
'section': section,
'references': references
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/database/entries', methods=['GET'])
def get_database_entries():
"""Get all entries from database"""
try:
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute('''
SELECT * FROM bibliography ORDER BY created_at DESC LIMIT 100
''')
columns = [description[0] for description in cursor.description]
entries = [dict(zip(columns, row)) for row in cursor.fetchall()]
conn.close()
return jsonify({
'success': True,
'count': len(entries),
'entries': entries
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/database/delete/<key>', methods=['DELETE'])
def delete_entry(key):
"""Delete entry from database"""
if not check_api_key():
return jsonify({'error': 'Unauthorized'}), 401
try:
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute('DELETE FROM bibliography WHERE key=?', (key,))
conn.commit()
conn.close()
return jsonify({'success': True})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/database/clear', methods=['POST'])
def clear_database():
"""Clear all entries from database"""
if not check_api_key():
return jsonify({'error': 'Unauthorized'}), 401
try:
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute('DELETE FROM bibliography')
deleted_count = cursor.rowcount
cursor.execute('DELETE FROM sqlite_sequence WHERE name="bibliography"')
conn.commit()
conn.close()
print(f"πŸ—‘οΈ Cleared {deleted_count} entries from database")
return jsonify({
'success': True,
'deleted_count': deleted_count,
'message': f'Successfully cleared {deleted_count} entries from database'
})
except Exception as e:
import traceback
traceback.print_exc()
return jsonify({'error': str(e)}), 500
@app.route('/api/database/export', methods=['GET'])
def export_database():
"""Export database as CSV"""
try:
conn = get_db_connection()
df = pd.read_sql_query('SELECT * FROM bibliography', conn)
conn.close()
output = io.StringIO()
df.to_csv(output, index=False)
output.seek(0)
return send_file(
io.BytesIO(output.getvalue().encode()),
mimetype='text/csv',
as_attachment=True,
download_name='references_export.csv'
)
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/database/export-bibtex', methods=['GET'])
def export_bibtex():
"""Export database as BibTeX"""
try:
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute('SELECT key, crossref_bibtex_protected FROM bibliography ORDER BY key')
bibtex_content = '\n\n'.join([row[1] for row in cursor.fetchall() if row[1]])
conn.close()
return send_file(
io.BytesIO(bibtex_content.encode()),
mimetype='text/plain',
as_attachment=True,
download_name='references.bib'
)
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/database/download', methods=['GET'])
def download_database():
"""Download entire database file"""
try:
return send_file(
app.config['DATABASE'],
mimetype='application/x-sqlite3',
as_attachment=True,
download_name='refs_management.db'
)
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/stats', methods=['GET'])
def get_stats():
"""Get database statistics"""
try:
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute('SELECT COUNT(*) FROM bibliography')
total = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(DISTINCT type) FROM bibliography')
types = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(DISTINCT year_int) FROM bibliography WHERE year_int IS NOT NULL')
years = cursor.fetchone()[0]
conn.close()
return jsonify({
'total_entries': total,
'entry_types': types,
'unique_years': years
})
except Exception as e:
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
init_db()
app.run(debug=False, host='0.0.0.0', port=7860)