Spaces:

nexusbert
/

Deckgpt

Sleeping

App Files Files Community

nexusbert commited on Nov 2, 2025

Commit

2d6c16e

1 Parent(s): 30507a7

go

Browse files

Files changed (1) hide show

app.py +205 -11

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import warnings
 from pathlib import Path
 from typing import Optional, Tuple
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.responses import JSONResponse
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from docx import Document as DocxDocument
@@ -19,10 +20,13 @@ import easyocr
 warnings.filterwarnings("ignore", category=UserWarning, module="pdfminer")
 warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 from pdfminer.high_level import extract_text as extract_pdf_text
 app = FastAPI(
@@ -31,6 +35,14 @@ app = FastAPI(
     version="1.0.0"
 )
 MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
 tokenizer = None
 model = None
@@ -206,14 +218,80 @@ def extract_text_from_file(file_path: str, file_extension: str) -> str:
     else:
         raise ValueError(f"Unsupported file type: {extension}. Supported: PDF, DOCX, PPT/PPTX")
 def review_pitchdeck(text: str) -> dict:
     """
     Send text to Zephyr model for VC-level review and return structured JSON
     """
     if not text or not text.strip():
         raise ValueError("No text content provided for review")
-    deck_text = text[:12000]
     system_message = """You are a senior venture capitalist with 15+ years of experience evaluating thousands of pitch decks. You know the patterns that lead to funding vs. ghosting. Based on extensive research analyzing hundreds of decks, these are the critical failure points:
@@ -237,11 +315,14 @@ THE 5 CRITICAL QUESTIONS every deck must answer clearly:
 Be brutally honest. Commercial clarity keeps doors open - GTM and financials get you funded. Emotion opens the door, but logic closes the deal."""
-    user_message = f"""Deck Content:
 {deck_text}
-TASK:
-Evaluate this deck against these real-world failure patterns. Check specifically for: commercial backbone, credible market sizing, GTM clarity, real traction metrics, team positioning, moat definition, specific ask, slide count/clarity, and financial logic.
 Produce ONLY valid JSON with these exact fields:
@@ -306,15 +387,30 @@ Produce ONLY valid JSON with these exact fields:
         ]
         prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=8192).to(model.device)
         outputs = model.generate(
             **inputs,
-            max_new_tokens=2000,
             temperature=0.3,
             do_sample=True,
-            top_p=0.9,
-            pad_token_id=tokenizer.eos_token_id
         )
         raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -337,6 +433,90 @@ Produce ONLY valid JSON with these exact fields:
         logger.error(f"Model generation error: {e}")
         raise ValueError(f"Error during model inference: {str(e)}")
 def generate_improvement_pointers(review: dict) -> dict:
     """Generate specific improvement pointers for decks below 80% or lacking clarity"""
     score = review.get("score", 0)
@@ -399,14 +579,28 @@ Return ONLY valid JSON:
         ]
         prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=8192).to(model.device)
         outputs = model.generate(
             **inputs,
-            max_new_tokens=1500,
             temperature=0.4,
             do_sample=True,
-            pad_token_id=tokenizer.eos_token_id
         )
         raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

 from pathlib import Path
 from typing import Optional, Tuple
 from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from docx import Document as DocxDocument
 warnings.filterwarnings("ignore", category=UserWarning, module="pdfminer")
 warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")
+warnings.filterwarnings("ignore", message=".*Cannot set gray.*")
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+logging.getLogger("pdfminer").setLevel(logging.ERROR)
 from pdfminer.high_level import extract_text as extract_pdf_text
 app = FastAPI(
     version="1.0.0"
 )
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
 tokenizer = None
 model = None
     else:
         raise ValueError(f"Unsupported file type: {extension}. Supported: PDF, DOCX, PPT/PPTX")
+def chunk_text(text: str, chunk_size: int = 6000, overlap: int = 500) -> list:
+    """
+    Split text into overlapping chunks for processing
+    """
+    if len(text) <= chunk_size:
+        return [text]
+    chunks = []
+    start = 0
+    while start < len(text):
+        end = start + chunk_size
+        if end >= len(text):
+            chunks.append(text[start:])
+            break
+        chunk_end = text.rfind('\n\n', start, end)
+        if chunk_end == -1:
+            chunk_end = text.rfind('\n', start, end)
+        if chunk_end == -1:
+            chunk_end = text.rfind('. ', start, end)
+        if chunk_end == -1:
+            chunk_end = end
+        chunks.append(text[start:chunk_end])
+        start = chunk_end - overlap
+        if start < 0:
+            start = 0
+    return chunks
 def review_pitchdeck(text: str) -> dict:
     """
     Send text to Zephyr model for VC-level review and return structured JSON
+    Uses chunking for long documents to improve processing speed
+    Zephyr-7b-beta has 4096 token context limit
     """
     if not text or not text.strip():
         raise ValueError("No text content provided for review")
+    max_tokens = 3800
+    estimated_chars_per_token = 4
+    max_text_length = int(max_tokens * estimated_chars_per_token * 0.8)
+    if len(text) > max_text_length:
+        logger.info(f"Text length ({len(text)} chars) exceeds safe limit ({max_text_length} chars), using chunking strategy")
+        chunks = chunk_text(text[:max_text_length], chunk_size=5000, overlap=500)
+        logger.info(f"Processing {len(chunks)} chunks...")
+        slide_reviews_combined = []
+        all_insights = []
+        for i, chunk in enumerate(chunks):
+            logger.info(f"Processing chunk {i+1}/{len(chunks)} ({len(chunk)} chars)...")
+            chunk_result = _review_chunk(chunk, is_partial=True, chunk_num=i+1, total_chunks=len(chunks))
+            if chunk_result.get("slide_reviews"):
+                slide_reviews_combined.extend(chunk_result["slide_reviews"])
+            if chunk_result.get("vc_insights"):
+                all_insights.append(chunk_result["vc_insights"])
+        logger.info("Combining chunk results into final review...")
+        return _combine_chunk_results(slide_reviews_combined, all_insights, text[:max_text_length])
+    deck_text = text[:6000]
+    return _review_chunk(deck_text, is_partial=False)
+def _review_chunk(deck_text: str, is_partial: bool = False, chunk_num: int = 1, total_chunks: int = 1) -> dict:
+    chunk_context = f"\n\n[Processing chunk {chunk_num} of {total_chunks} - focus on slides in this section]" if is_partial else ""
     system_message = """You are a senior venture capitalist with 15+ years of experience evaluating thousands of pitch decks. You know the patterns that lead to funding vs. ghosting. Based on extensive research analyzing hundreds of decks, these are the critical failure points:
 Be brutally honest. Commercial clarity keeps doors open - GTM and financials get you funded. Emotion opens the door, but logic closes the deal."""
+    task_instruction = """TASK:
+Evaluate this deck against these real-world failure patterns. Check specifically for: commercial backbone, credible market sizing, GTM clarity, real traction metrics, team positioning, moat definition, specific ask, slide count/clarity, and financial logic.""" if not is_partial else """TASK:
+Review this section of the deck. Extract slide-by-slide analysis. Focus on identifying slide content, titles, and issues. Note: This is part of a larger deck - provide detailed slide reviews for this section only."""
+    user_message = f"""Deck Content{chunk_context}:
 {deck_text}
+{task_instruction}
 Produce ONLY valid JSON with these exact fields:
         ]
         prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        prompt_tokens = tokenizer.encode(prompt, return_tensors="pt")
+        prompt_token_count = prompt_tokens.shape[1]
+        max_input_tokens = 3800
+        max_output_tokens = 4096 - max_input_tokens
+        if prompt_token_count > max_input_tokens:
+            logger.warning(f"Prompt is {prompt_token_count} tokens, truncating to {max_input_tokens}")
+            prompt_tokens = prompt_tokens[:, :max_input_tokens]
+            prompt = tokenizer.decode(prompt_tokens[0], skip_special_tokens=True)
+        logger.info(f"Input tokens: ~{prompt_token_count}, Max output tokens: {max_output_tokens}")
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_tokens).to(model.device)
         outputs = model.generate(
             **inputs,
+            max_new_tokens=min(1500, max_output_tokens),
             temperature=0.3,
             do_sample=True,
+            top_p=0.95,
+            pad_token_id=tokenizer.eos_token_id,
+            use_cache=True
         )
         raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
         logger.error(f"Model generation error: {e}")
         raise ValueError(f"Error during model inference: {str(e)}")
+def _combine_chunk_results(slide_reviews: list, insights: list, full_text: str) -> dict:
+    """
+    Combine results from multiple chunks into a single comprehensive review
+    """
+    system_message = """You are synthesizing multiple partial reviews of a pitch deck into one comprehensive VC evaluation."""
+    user_message = f"""You have received partial reviews of a pitch deck. Combine them into one final comprehensive review.
+Slide Reviews from chunks:
+{json.dumps(slide_reviews[:50], indent=2)}
+Key Insights:
+{json.dumps(insights, indent=2)}
+Full Deck Length: {len(full_text)} characters
+Produce a FINAL comprehensive review with the same JSON structure as before, consolidating all findings."""
+    try:
+        messages = [
+            {"role": "system", "content": system_message},
+            {"role": "user", "content": user_message}
+        ]
+        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        prompt_tokens = tokenizer.encode(prompt, return_tensors="pt")
+        prompt_token_count = prompt_tokens.shape[1]
+        max_input_tokens = 3800
+        max_output_tokens = 4096 - max_input_tokens
+        if prompt_token_count > max_input_tokens:
+            logger.warning(f"Combine prompt is {prompt_token_count} tokens, truncating to {max_input_tokens}")
+            prompt_tokens = prompt_tokens[:, :max_input_tokens]
+            prompt = tokenizer.decode(prompt_tokens[0], skip_special_tokens=True)
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_tokens).to(model.device)
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=min(1500, max_output_tokens),
+            temperature=0.3,
+            do_sample=True,
+            top_p=0.95,
+            pad_token_id=tokenizer.eos_token_id,
+            use_cache=True
+        )
+        raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        if "<|assistant|>" in raw_output:
+            raw_output = raw_output.split("<|assistant|>")[-1]
+        start = raw_output.find('{')
+        end = raw_output.rfind('}') + 1
+        if start == -1 or end == 0:
+            logger.warning("Failed to parse combined result, returning basic structure")
+            return {
+                "verdict": "Follow-up",
+                "score": 70,
+                "grade": "B",
+                "top_line": "Deck reviewed across multiple sections",
+                "slide_reviews": slide_reviews[:20],
+                "note": "Combined from chunked processing"
+            }
+        combined_json = json.loads(raw_output[start:end])
+        if slide_reviews and not combined_json.get("slide_reviews"):
+            combined_json["slide_reviews"] = slide_reviews[:30]
+        return combined_json
+    except Exception as e:
+        logger.warning(f"Combining chunks failed: {e}, returning first chunk result")
+        return {
+            "verdict": "Follow-up",
+            "score": 70,
+            "grade": "B",
+            "top_line": "Deck processed in chunks",
+            "slide_reviews": slide_reviews[:20] if slide_reviews else []
+        }
 def generate_improvement_pointers(review: dict) -> dict:
     """Generate specific improvement pointers for decks below 80% or lacking clarity"""
     score = review.get("score", 0)
         ]
         prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        prompt_tokens = tokenizer.encode(prompt, return_tensors="pt")
+        prompt_token_count = prompt_tokens.shape[1]
+        max_input_tokens = 3800
+        max_output_tokens = 4096 - max_input_tokens
+        if prompt_token_count > max_input_tokens:
+            logger.warning(f"Improvement prompt is {prompt_token_count} tokens, truncating to {max_input_tokens}")
+            prompt_tokens = prompt_tokens[:, :max_input_tokens]
+            prompt = tokenizer.decode(prompt_tokens[0], skip_special_tokens=True)
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_tokens).to(model.device)
         outputs = model.generate(
             **inputs,
+            max_new_tokens=min(1000, max_output_tokens),
             temperature=0.4,
             do_sample=True,
+            top_p=0.95,
+            pad_token_id=tokenizer.eos_token_id,
+            use_cache=True
         )
         raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)