Spaces:

Krish-05
/

text-extraction-api

Sleeping

App Files Files Community

krishnachoudhary-hclguvi commited on Apr 3

Commit

a2aa7c3

unverified ·

1 Parent(s): 483f7ec

Sync GitHub commit b749f19 updates

Browse files

Files changed (18) hide show

.gitignore +40 -0
analyzers/summarizer.py +64 -9
analyzers/text_cleaner.py +46 -0
config.py +12 -3
extractors/ocr_extractor.py +80 -79
extractors/url_extractor.py +3 -1
main.py +12 -0
models/schemas.py +1 -0
requirements.txt +2 -0
static/app.js +34 -2
static/index.html +5 -0
static/styles.css +102 -6
test_gemini.py +22 -0
test_gemini_vision.py +18 -0
test_models.py +18 -0
test_ocr.py +9 -0
test_raw.py +16 -0
test_raw2.py +25 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,40 @@

+# Build and virtual environments
+.venv/
+venv/
+ENV/
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+env/
+pip-log.txt
+pip-delete-this-directory.txt
+# Local configuration and databases
+.env
+docintel.db
+docintel.db-journal
+uploads/*
+!uploads/.gitkeep
+# Sensitive or large test files
+test_results.txt
+test_output.txt
+test_api_results.json
+*.log
+# IDE and System files
+.idea/
+.vscode/
+.vscode-test/
+.DS_Store
+Thumbs.db
+# Tool-specific
+.gemini/
+scripts/
+brain/
+implementation_plan.md
+task.md
+walkthrough.md

analyzers/summarizer.py CHANGED Viewed

@@ -11,10 +11,66 @@ from sumy.nlp.stemmers import Stemmer
 from sumy.utils import get_stop_words
 from models.schemas import SummaryResult
 from config import SUMMARY_SENTENCE_COUNT, SUMMARY_ALGORITHM
 LANGUAGE = "english"
 def _get_summarizer(algorithm: str):
     """Get the appropriate summarizer based on algorithm name."""
     stemmer = Stemmer(LANGUAGE)
@@ -32,21 +88,20 @@ def _get_summarizer(algorithm: str):
 def summarize_text(text: str, sentence_count: int = None, algorithm: str = None) -> SummaryResult:
     """
-    Generate an extractive summary of the given text.
-    Args:
-        text: The input text to summarize.
-        sentence_count: Number of sentences in the summary (default from config).
-        algorithm: Summarization algorithm to use (default from config).
-    Returns:
-        SummaryResult with the summary and statistics.
     """
     if sentence_count is None:
         sentence_count = SUMMARY_SENTENCE_COUNT
     if algorithm is None:
         algorithm = SUMMARY_ALGORITHM
     # Handle short texts
     sentences_in_text = [s.strip() for s in text.replace("\n", " ").split(".") if s.strip()]
     if len(sentences_in_text) <= sentence_count:

 from sumy.utils import get_stop_words
 from models.schemas import SummaryResult
 from config import SUMMARY_SENTENCE_COUNT, SUMMARY_ALGORITHM
+import config
+import time
+try:
+    import google.generativeai as genai
+    GEMINI_AVAILABLE = True
+except ImportError:
+    GEMINI_AVAILABLE = False
 LANGUAGE = "english"
+def summarize_with_gemini(text: str) -> SummaryResult:
+    """Generate high-quality summary and key highlights using Gemini AI."""
+    if not config.is_gemini_available():
+        return None
+    start_time = time.time()
+    try:
+        genai.configure(api_key=config.GEMINI_API_KEY)
+        model = genai.GenerativeModel(config.GEMINI_MODEL_NAME)
+        prompt = (
+            "You are an expert document analyst. Read the following text and create a highly synthesized, unique abstractive summary.\n"
+            "CRITICAL INSTRUCTIONS:\n"
+            "1. Do NOT just copy/paste or extract sentences verbatim from the text. Synthesize the meaning into your own words.\n"
+            "2. Provide a unique, high-level overview of the entire document's core message or purpose.\n"
+            "3. Structure the summary with thematic topics (e.g., **Key Themes**, **Major Findings**, **Core Assertions**, or document-specific domains like **Experience** for resumes).\n"
+            "4. For each topic, provide concise insights, not just a list of extracted facts.\n"
+            "5. Synthesize 3 to 7 truly unique, critical 'key points' that represent the ultimate takeaways of the document for the key_points array.\n"
+            "Respond strictly in JSON format:\n"
+            '{"summary": "**Topic 1**\\n- Insightful summary point 1...\\n\\n**Topic 2**\\n- Insightful summary point 2...", "key_points": ["**CORE TAKEAWAY**: synthesized point", ...]}'
+        )
+        response = model.generate_content(f"{prompt}\n\nText: {text}", generation_config={"response_mime_type": "application/json"})
+        import json
+        data = json.loads(response.text)
+        summary = data.get("summary", "")
+        key_points = data.get("key_points", [])
+        if summary:
+            elapsed = (time.time() - start_time) * 1000
+            compression_ratio = len(summary) / len(text) if len(text) > 0 else 1.0
+            return SummaryResult(
+                summary=summary,
+                key_points=key_points,
+                original_length=len(text),
+                summary_length=len(summary),
+                compression_ratio=round(compression_ratio, 4),
+                sentence_count=len(key_points), # Using key_points count as surrogate
+                algorithm="Gemini AI (Abstractive)"
+            )
+    except Exception as e:
+        print(f"Gemini summarization failed: {e}")
+    return None
 def _get_summarizer(algorithm: str):
     """Get the appropriate summarizer based on algorithm name."""
     stemmer = Stemmer(LANGUAGE)
 def summarize_text(text: str, sentence_count: int = None, algorithm: str = None) -> SummaryResult:
     """
+    Generate an extractive or abstractive summary of the given text.
+    Prioritizes Gemini if available.
     """
     if sentence_count is None:
         sentence_count = SUMMARY_SENTENCE_COUNT
     if algorithm is None:
         algorithm = SUMMARY_ALGORITHM
+    # 0. Try Gemini (Superior abstractive quality)
+    if GEMINI_AVAILABLE and config.is_gemini_available():
+        gemini_result = summarize_with_gemini(text)
+        if gemini_result:
+            return gemini_result
     # Handle short texts
     sentences_in_text = [s.strip() for s in text.replace("\n", " ").split(".") if s.strip()]
     if len(sentences_in_text) <= sentence_count:

analyzers/text_cleaner.py ADDED Viewed

	@@ -0,0 +1,46 @@

+"""
+Intelligent text cleaner using Gemini to format raw OCR and PDF extractions perfectly.
+"""
+import time
+import config
+try:
+    import google.generativeai as genai
+    GEMINI_AVAILABLE = True
+except ImportError:
+    GEMINI_AVAILABLE = False
+def clean_format_text(raw_text: str) -> str:
+    """Pass raw extracted text through Gemini to clean formatting and add markdown structure without missing words."""
+    if not config.is_gemini_available() or not GEMINI_AVAILABLE:
+        return raw_text
+    # Skip if text is extremely short
+    if len(raw_text.strip()) < 50:
+        return raw_text
+    try:
+        genai.configure(api_key=config.GEMINI_API_KEY)
+        model = genai.GenerativeModel(config.GEMINI_MODEL_NAME)
+        prompt = (
+            "You are a master document formatting assistant. Your task is to clean up and perfectly format the raw extracted text below into a structured and topic-wise format.\n\n"
+            "CRITICAL INSTRUCTIONS:\n"
+            "1. You MUST preserve EVERY SINGLE WORD and detail from the original text. Do not summarize, skip, or rephrase anything. No information loss is acceptable.\n"
+            "2. Organize all content logically into structured, thematic topics (topic-wise). Apply bold markdown headers (e.g. **Contact Information**, **Experience**, **Summary**, or other relevant topics) and use proper bullet points.\n"
+            "3. Fix arbitrary broken line-breaks (typical OCR artifacts) and stitch sentences back together naturally.\n"
+            "4. Return ONLY the perfectly formatted text. Do not include any JSON wrapping or conversational preamble.\n\n"
+            "RAW TEXT:\n"
+        )
+        # We don't use JSON response here, we just want plain formatted text
+        response = model.generate_content(prompt + raw_text)
+        if response.text and len(response.text.strip()) > 0:
+            return response.text.strip()
+    except Exception as e:
+        print(f"Intelligent formatting failed, falling back to raw: {e}")
+    return raw_text

config.py CHANGED Viewed

@@ -1,8 +1,9 @@
-"""
-Configuration settings for the Document Processing System.
-"""
 import os
 import shutil
 # --- Paths ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -75,3 +76,11 @@ SENTIMENT_THRESHOLDS = {
     "positive": 0.05,
     "negative": -0.05,
 }

 import os
 import shutil
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
 # --- Paths ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
     "positive": 0.05,
     "negative": -0.05,
 }
+# --- Gemini AI Configuration ---
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+GEMINI_MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
+# Flag to check if Gemini is configured
+def is_gemini_available():
+    return bool(GEMINI_API_KEY)

extractors/ocr_extractor.py CHANGED Viewed

@@ -9,6 +9,12 @@ from PIL import Image, ImageEnhance, ImageFilter, ImageOps
 from models.schemas import ExtractionResult, DocumentMetadata
 import config
 # --- OCR Engine Detection ---
 try:
@@ -81,40 +87,6 @@ def _preprocess_image(image: Image.Image) -> Image.Image:
     return image
-def _preprocess_color_text(image: Image.Image) -> Image.Image:
-    """Preprocess image to preserve colored headline text (e.g., certificates)."""
-    rgb = image.convert("RGB")
-    rgb = ImageEnhance.Color(rgb).enhance(2.2)
-    rgb = ImageEnhance.Contrast(rgb).enhance(1.25)
-    rgb = rgb.filter(ImageFilter.SHARPEN)
-    return rgb
-def _filter_easyocr_results(results: list, min_conf: float = 0.25) -> list:
-    """Drop very low-confidence and non-informative EasyOCR boxes."""
-    filtered = []
-    for item in results or []:
-        if len(item) < 3:
-            continue
-        text = str(item[1]).strip()
-        conf = float(item[2])
-        if conf < min_conf:
-            continue
-        if not any(ch.isalnum() for ch in text):
-            continue
-        filtered.append(item)
-    return filtered
-def _score_extracted_text(text: str) -> int:
-    """Heuristic score to choose best OCR pass output."""
-    if not text:
-        return 0
-    alpha_num = sum(1 for c in text if c.isalnum())
-    penalties = sum(1 for c in text if c in "{}[]|~`")
-    return alpha_num - (penalties * 3)
 def _reconstruct_from_boxes(results: list) -> str:
     """ Reconstruct text layout from bounding boxes.
         Sort by top, then group by 'lines' based on y-coordinate.
@@ -167,59 +139,88 @@ def _reconstruct_from_boxes(results: list) -> str:
     return "\n".join(final_text)
-def extract_image(file_path: str) -> ExtractionResult:
-    """Extract text from an image using the best available OCR engine."""
     start_time = time.time()
-    original_size = (0, 0)
     try:
-        with Image.open(file_path) as img:
-            original_size = img.size
-    except Exception:
-        # Keep defaults; OCR engines will surface the real file/open errors.
-        pass
-    # 1. Check for EasyOCR (Preferred)
     if EASYOCR_AVAILABLE:
         try:
             reader = get_easyocr_reader()
             if reader:
-                with Image.open(file_path) as src_img:
-                    base_img = src_img.convert("RGB")
-                # Pass 1: standard detection with lower thresholds for certificate layouts.
-                results_default = reader.readtext(
-                    np.array(base_img),
-                    detail=1,
-                    paragraph=False,
-                    canvas_size=1200,
-                    contrast_ths=0.1,
-                    mag_ratio=1.2,
-                    text_threshold=0.6,
-                    low_text=0.25,
-                    link_threshold=0.25,
-                )
-                # Pass 2: boosted color/contrast to recover orange/blue headings.
-                color_img = _preprocess_color_text(base_img)
-                results_color = reader.readtext(
-                    np.array(color_img),
-                    detail=1,
-                    paragraph=False,
-                    canvas_size=1200,
-                    contrast_ths=0.05,
-                    mag_ratio=1.2,
-                    text_threshold=0.55,
-                    low_text=0.2,
-                    link_threshold=0.2,
                 )
-                filtered_default = _filter_easyocr_results(results_default)
-                filtered_color = _filter_easyocr_results(results_color)
-                text_default = _reconstruct_from_boxes(filtered_default)
-                text_color = _reconstruct_from_boxes(filtered_color)
-                text = text_default if _score_extracted_text(text_default) >= _score_extracted_text(text_color) else text_color
                 if text.strip():
                     elapsed = (time.time() - start_time) * 1000

 from models.schemas import ExtractionResult, DocumentMetadata
 import config
+try:
+    import google.generativeai as genai
+    GEMINI_AVAILABLE = True
+except ImportError:
+    GEMINI_AVAILABLE = False
 # --- OCR Engine Detection ---
 try:
     return image
 def _reconstruct_from_boxes(results: list) -> str:
     """ Reconstruct text layout from bounding boxes.
         Sort by top, then group by 'lines' based on y-coordinate.
     return "\n".join(final_text)
+def extract_image_gemini(file_path: str) -> ExtractionResult:
+    """Extract text from an image using Gemini 1.5 Flash for perfect layout alignment."""
+    if not config.GEMINI_API_KEY:
+        return ExtractionResult(success=False, error_message="Gemini API Key missing", raw_text="", metadata=DocumentMetadata())
     start_time = time.time()
     try:
+        genai.configure(api_key=config.GEMINI_API_KEY)
+        model = genai.GenerativeModel(config.GEMINI_MODEL_NAME)
+        image = Image.open(file_path)
+        # Prompt for perfect extraction with layout preservation
+        prompt = (
+            "Perform OCR on this image. Extract EVERY bit of text correctly. "
+            "Maintain the original layout, columns, and spacing exactly as they appear. "
+            "Do not add any explanations, markdown, or commentary. Output only the extracted text."
+        )
+        response = model.generate_content([prompt, image])
+        text = response.text.strip()
+        if text:
+            elapsed = (time.time() - start_time) * 1000
+            metadata = DocumentMetadata(
+                title=os.path.basename(file_path),
+                page_count=1,
+                word_count=len(text.split()),
+                character_count=len(text),
+                file_type="Image (Gemini AI)",
+                extra={
+                    "image_width": image.width,
+                    "image_height": image.height,
+                    "ocr_engine": "Gemini 1.5 Flash",
+                    "accuracy": "Perfect (Vision-Language Model)"
+                }
+            )
+            return ExtractionResult(
+                raw_text=text,
+                metadata=metadata,
+                success=True,
+                extraction_time_ms=elapsed
+            )
+    except Exception as e:
+        print(f"Gemini OCR failed: {e}")
+    return ExtractionResult(success=False, error_message="Gemini failed", raw_text="", metadata=DocumentMetadata())
+def extract_image(file_path: str) -> ExtractionResult:
+    """Extract text from an image using the best available OCR engine (Gemini -> EasyOCR -> Tesseract)."""
+    start_time = time.time()
+    # 0. Check for Gemini (Best quality, layout aware)
+    if GEMINI_AVAILABLE and config.is_gemini_available():
+        result = extract_image_gemini(file_path)
+        if result.success:
+            return result
+    # 1. Check for EasyOCR (Preferred local)
     if EASYOCR_AVAILABLE:
         try:
             reader = get_easyocr_reader()
             if reader:
+                # Get original dimensions for metadata
+                with Image.open(file_path) as img:
+                    original_size = img.size
+                # EasyOCR works well with both original and preprocessed images
+                # We'll use a slightly preprocessed version for consistency
+                # Perform OCR with layout awareness
+                # Adjusting thresholds for better numeric and tabular capture
+                results = reader.readtext(
+                    file_path,
+                    detail=1,
+                    paragraph=False, # We want individual boxes for layout reconstruction
+                    canvas_size=1200, # Shrunk to detect huge fonts (like certificate names) that CRAFT misses
+                    contrast_ths=0.1  # Reset to 0.1 so colored/light text isn't dropped
                 )
+                # Reconstruct full layout from bounding boxes
+                text = _reconstruct_from_boxes(results)
                 if text.strip():
                     elapsed = (time.time() - start_time) * 1000

extractors/url_extractor.py CHANGED Viewed

@@ -17,7 +17,9 @@ def extract_url(url: str) -> ExtractionResult:
         headers = {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
-        response = requests.get(url, headers=headers, timeout=10)
         response.raise_for_status()
         # 2. Parse HTML

         headers = {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
+        import urllib3
+        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        response = requests.get(url, headers=headers, timeout=10, verify=False)
         response.raise_for_status()
         # 2. Parse HTML

main.py CHANGED Viewed

@@ -24,6 +24,7 @@ from extractors.url_extractor import extract_url
 from analyzers.summarizer import summarize_text
 from analyzers.ner_extractor import extract_entities
 from analyzers.sentiment import analyze_sentiment
 # --- App Setup ---
 app = FastAPI(
@@ -96,7 +97,18 @@ def _process_document(file_path: str, file_type: str, task_id: str):
             task.error_message = extraction.error_message or "No text could be extracted."
             task.processing_time_ms = (time.time() - start_time) * 1000
             return
         raw_text = extraction.raw_text
         # Step 2: Summarization

 from analyzers.summarizer import summarize_text
 from analyzers.ner_extractor import extract_entities
 from analyzers.sentiment import analyze_sentiment
+from analyzers.text_cleaner import clean_format_text
 # --- App Setup ---
 app = FastAPI(
             task.error_message = extraction.error_message or "No text could be extracted."
             task.processing_time_ms = (time.time() - start_time) * 1000
             return
+        raw_text = extraction.raw_text
+        # Intelligent Formatting Pass via Gemini
+        formatted_text = clean_format_text(raw_text)
+        if formatted_text == raw_text:
+            # Fallback cleanup for broken line breaks if Gemini was unavailable
+            import re
+            formatted_text = re.sub(r'(?<!\n)\n(?!\n)', ' ', formatted_text)
+            formatted_text = re.sub(r'[ \t]+', ' ', formatted_text)
+        extraction.raw_text = formatted_text.strip()
         raw_text = extraction.raw_text
         # Step 2: Summarization

models/schemas.py CHANGED Viewed

@@ -52,6 +52,7 @@ class ExtractionResult(BaseModel):
 class SummaryResult(BaseModel):
     summary: str
     original_length: int
     summary_length: int
     compression_ratio: float

 class SummaryResult(BaseModel):
     summary: str
+    key_points: List[str] = []
     original_length: int
     summary_length: int
     compression_ratio: float

requirements.txt CHANGED Viewed

@@ -14,3 +14,5 @@ nltk==3.8.1
 aiofiles==24.1.0
 requests==2.32.3
 beautifulsoup4==4.12.3

 aiofiles==24.1.0
 requests==2.32.3
 beautifulsoup4==4.12.3
+google-generativeai
+python-dotenv

static/app.js CHANGED Viewed

@@ -261,26 +261,56 @@ function displayResults(data) {
     const timeSeconds = (data.processing_time_ms / 1000).toFixed(1);
     $('#processingTime').textContent = `⏱ ${timeSeconds}s`;
     // Extracted Text
     const textEl = $('#extractedText');
     if (data.extraction?.raw_text) {
-        textEl.textContent = data.extraction.raw_text;
     } else {
         textEl.innerHTML = `<p class="placeholder">${data.extraction?.error_message || 'No text extracted.'}</p>`;
     }
     // Summary
     if (data.summary) {
-        $('#summaryContent').textContent = data.summary.summary;
         $('#summaryStats').classList.remove('hidden');
         $('#statOriginalLen').textContent = data.summary.original_length.toLocaleString();
         $('#statSummaryLen').textContent = data.summary.summary_length.toLocaleString();
         const pct = Math.round((1 - data.summary.compression_ratio) * 100);
         $('#statCompression').textContent = `${pct}%`;
         $('#statAlgorithm').textContent = data.summary.algorithm;
     } else {
         $('#summaryContent').innerHTML = '<p class="placeholder">Summarization not available.</p>';
         $('#summaryStats').classList.add('hidden');
     }
     // Entities
@@ -560,6 +590,8 @@ function resetAll() {
     $('#extractedText').innerHTML = '<p class="placeholder">No text extracted yet.</p>';
     $('#summaryContent').innerHTML = '<p class="placeholder">No summary available.</p>';
     $('#summaryStats').classList.add('hidden');
     $('#entityCategories').innerHTML = '<p class="placeholder">No entities detected.</p>';
     $('#entityList').innerHTML = '';
     $('#sentimentOverview').innerHTML = '<p class="placeholder">No sentiment data available.</p>';

     const timeSeconds = (data.processing_time_ms / 1000).toFixed(1);
     $('#processingTime').textContent = `⏱ ${timeSeconds}s`;
+    // Fallback parser in case CDN fails or is blocked
+    const parseMarkdown = (text) => {
+        if (!text) return '';
+        if (window.marked && window.marked.parse) {
+            return window.marked.parse(text);
+        } else if (window.marked) {
+            return window.marked(text);
+        }
+        // Very basic fallback if marked fails to load
+        return escapeHtml(text).replace(/\n/g, '<br>').replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
+    };
     // Extracted Text
     const textEl = $('#extractedText');
     if (data.extraction?.raw_text) {
+        textEl.innerHTML = parseMarkdown(data.extraction.raw_text);
     } else {
         textEl.innerHTML = `<p class="placeholder">${data.extraction?.error_message || 'No text extracted.'}</p>`;
     }
     // Summary
     if (data.summary) {
+        $('#summaryContent').innerHTML = parseMarkdown(data.summary.summary || 'Summary generation failed.');
         $('#summaryStats').classList.remove('hidden');
         $('#statOriginalLen').textContent = data.summary.original_length.toLocaleString();
         $('#statSummaryLen').textContent = data.summary.summary_length.toLocaleString();
         const pct = Math.round((1 - data.summary.compression_ratio) * 100);
         $('#statCompression').textContent = `${pct}%`;
         $('#statAlgorithm').textContent = data.summary.algorithm;
+        // Render Key Highlights
+        const highlightsContainer = $('#keyHighlightsContainer');
+        const highlightsList = $('#highlightsList');
+        if (data.summary.key_points && data.summary.key_points.length > 0) {
+            highlightsContainer.classList.remove('hidden');
+            highlightsList.innerHTML = data.summary.key_points
+                .map(point => {
+                    let escaped = escapeHtml(point);
+                    let bolded = escaped.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
+                    return `<li>${bolded}</li>`;
+                })
+                .join('');
+        } else {
+            highlightsContainer.classList.add('hidden');
+        }
     } else {
         $('#summaryContent').innerHTML = '<p class="placeholder">Summarization not available.</p>';
         $('#summaryStats').classList.add('hidden');
+        $('#keyHighlightsContainer').classList.add('hidden');
     }
     // Entities
     $('#extractedText').innerHTML = '<p class="placeholder">No text extracted yet.</p>';
     $('#summaryContent').innerHTML = '<p class="placeholder">No summary available.</p>';
     $('#summaryStats').classList.add('hidden');
+    $('#keyHighlightsContainer').classList.add('hidden');
+    $('#highlightsList').innerHTML = '';
     $('#entityCategories').innerHTML = '<p class="placeholder">No entities detected.</p>';
     $('#entityList').innerHTML = '';
     $('#sentimentOverview').innerHTML = '<p class="placeholder">No sentiment data available.</p>';

static/index.html CHANGED Viewed

@@ -199,6 +199,10 @@
                         <div class="summary-content" id="summaryContent">
                             <p class="placeholder">No summary available.</p>
                         </div>
                         <div class="summary-stats hidden" id="summaryStats">
                             <div class="stat-card">
                                 <span class="stat-value" id="statOriginalLen">0</span>
@@ -263,6 +267,7 @@
     <!-- Toast Container -->
     <div class="toast-container" id="toastContainer"></div>
     <script src="/static/app.js"></script>
 </body>
 </html>

                         <div class="summary-content" id="summaryContent">
                             <p class="placeholder">No summary available.</p>
                         </div>
+                        <div class="key-highlights hidden" id="keyHighlightsContainer">
+                            <h4>Key Highlights</h4>
+                            <ul class="highlights-list" id="highlightsList"></ul>
+                        </div>
                         <div class="summary-stats hidden" id="summaryStats">
                             <div class="stat-card">
                                 <span class="stat-value" id="statOriginalLen">0</span>
     <!-- Toast Container -->
     <div class="toast-container" id="toastContainer"></div>
+    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
     <script src="/static/app.js"></script>
 </body>
 </html>

static/styles.css CHANGED Viewed

@@ -639,18 +639,58 @@ body {
 /* --- Text Content --- */
 .text-content, .summary-content {
-    padding: 24px;
     background: #ffffff;
     border: 1px solid var(--border-light);
     border-radius: var(--radius-lg);
-    color: var(--text-primary);
     box-shadow: var(--shadow-sm);
-    max-height: 500px;
     overflow-y: auto;
-    font-size: 0.9rem;
-    line-height: 1.8;
-    white-space: pre-wrap;
     word-wrap: break-word;
 }
 .summary-content {
@@ -1154,3 +1194,59 @@ body {
         padding: 3px 8px;
     }
 }

 /* --- Text Content --- */
 .text-content, .summary-content {
+    padding: 32px;
     background: #ffffff;
     border: 1px solid var(--border-light);
     border-radius: var(--radius-lg);
+    color: #334155; /* Slightly softer text color for reduced eye strain */
     box-shadow: var(--shadow-sm);
+    max-height: 600px;
     overflow-y: auto;
+    font-size: 1rem;
+    line-height: 1.7; /* Optimal readable line height */
     word-wrap: break-word;
+    text-align: left; /* Left alignment completely cures weird spacing gaps */
+    letter-spacing: 0.015em;
+    word-spacing: 0.05em; /* Smooth spacing between words */
+}
+/* Enhancing Markdown Elements */
+.text-content p, .summary-content p {
+    margin-bottom: 1.25em;
+}
+.text-content p:last-child, .summary-content p:last-child {
+    margin-bottom: 0;
+}
+.text-content h1, .summary-content h1,
+.text-content h2, .summary-content h2,
+.text-content h3, .summary-content h3 {
+    margin-top: 1.5em;
+    margin-bottom: 0.75em;
+    font-weight: 700;
+    color: var(--text-primary);
+}
+.text-content h1, .summary-content h1 { font-size: 1.5rem; }
+.text-content h2, .summary-content h2 { font-size: 1.35rem; }
+.text-content h3, .summary-content h3 { font-size: 1.15rem; }
+.text-content ul, .summary-content ul,
+.text-content ol, .summary-content ol {
+    margin-top: 0;
+    margin-bottom: 1.25em;
+    padding-left: 1.5em;
+}
+.text-content li, .summary-content li {
+    margin-bottom: 0.5em;
+}
+.text-content strong, .summary-content strong {
+    font-weight: 600;
+    color: var(--text-primary);
 }
 .summary-content {
         padding: 3px 8px;
     }
 }
+/* --- Key Highlights Styling --- */
+.key-highlights {
+    margin-top: 24px;
+    padding-top: 20px;
+}
+.key-highlights h4 {
+    font-size: 1rem;
+    font-weight: 700;
+    margin-bottom: 16px;
+    color: var(--accent-blue-deep);
+    display: flex;
+    align-items: center;
+    gap: 8px;
+}
+.key-highlights h4::before {
+    content: '✨';
+}
+.highlights-list {
+    list-style: none;
+    display: flex;
+    flex-direction: column;
+    gap: 12px;
+}
+.highlights-list li {
+    position: relative;
+    padding: 12px 16px 12px 42px;
+    background: var(--bg-primary);
+    border: 1px solid var(--border-light);
+    border-radius: var(--radius-md);
+    font-size: 0.9rem;
+    color: var(--text-primary);
+    line-height: 1.6;
+    transition: var(--transition-fast);
+}
+.highlights-list li:hover {
+    transform: translateX(4px);
+    border-color: var(--accent-blue-light);
+    background: var(--bg-secondary);
+    box-shadow: var(--shadow-sm);
+}
+.highlights-list li::before {
+    content: '→';
+    position: absolute;
+    left: 16px;
+    top: 50%;
+    transform: translateY(-50%);
+    color: var(--accent-blue);
+    font-weight: 800;
+}

test_gemini.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import os
+from dotenv import load_dotenv
+import google.generativeai as genai
+from PIL import Image
+load_dotenv()
+api_key = os.getenv("GEMINI_API_KEY")
+model_name = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
+print(f"API Key available: {bool(api_key)}")
+print(f"Model: {model_name}")
+try:
+    genai.configure(api_key=api_key)
+    model = genai.GenerativeModel(model_name)
+    # create a dummy image
+    img = Image.new('RGB', (100, 100), color = 'white')
+    response = model.generate_content(["What color is this?", img])
+    print("Success:", response.text)
+except Exception as e:
+    import traceback
+    traceback.print_exc()

test_gemini_vision.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import config
+import google.generativeai as genai
+from PIL import Image
+try:
+    img = Image.new('RGB', (100, 100), color = 'white')
+    genai.configure(api_key=config.GEMINI_API_KEY)
+    for model_name in ["gemini-1.5-flash", "gemini-2.0-flash-lite", "gemini-1.5-flash-8b"]:
+        try:
+            model = genai.GenerativeModel(model_name)
+            response = model.generate_content(["Tell me what is in this image", img])
+            print(f"SUCCESS with {model_name}:", response.text[:20])
+            break
+        except Exception as e:
+            print(f"FAILED {model_name}: {type(e).__name__} {str(e)[:50]}")
+except Exception as e:
+    print("Fatal exception:", e)

test_models.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import os
+from dotenv import load_dotenv
+import google.generativeai as genai
+from PIL import Image
+load_dotenv()
+api_key = os.getenv("GEMINI_API_KEY")
+genai.configure(api_key=api_key)
+for model_name in ["gemini-1.5-flash", "gemini-2.0-flash", "gemini-2.5-flash", "gemini-pro"]:
+    try:
+        print(f"Testing {model_name}...")
+        model = genai.GenerativeModel(model_name)
+        img = Image.new('RGB', (100, 100), color = 'white')
+        response = model.generate_content(["What color is this?", img])
+        print(f"{model_name} Success!")
+    except Exception as e:
+        print(f"{model_name} Failed: {type(e).__name__}")

test_ocr.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import config
+import google.generativeai as genai
+genai.configure(api_key=config.GEMINI_API_KEY)
+models = [m.name for m in genai.list_models()]
+print("Available models:")
+for m in models:
+    if 'flash' in m.lower() or '2.5' in m.lower() or '1.5' in m.lower():
+        print(m)

test_raw.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import os
+import requests
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("GEMINI_API_KEY")
+url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={api_key}"
+headers = {"Content-Type": "application/json"}
+data = {
+    "contents": [{"parts": [{"text": "Hello, world!"}]}]
+}
+response = requests.post(url, headers=headers, json=data)
+print(f"Status Code: {response.status_code}")
+print(f"Response Body:\n{response.text}")

test_raw2.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import requests
+import json
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("GEMINI_API_KEY")
+def test_api():
+    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={api_key}"
+    headers = {"Content-Type": "application/json"}
+    data = {
+        "contents": [{"parts": [{"text": "Hello, world!"}]}]
+    }
+    try:
+        response = requests.post(url, headers=headers, json=data)
+        with open("clean_output.txt", "w", encoding="utf-8") as f:
+            f.write(f"Status Code: {response.status_code}\n")
+            f.write(json.dumps(response.json(), indent=2))
+    except Exception as e:
+        with open("clean_output.txt", "w", encoding="utf-8") as f:
+            f.write(str(e))
+test_api()