nexusbert commited on
Commit
2d6c16e
·
1 Parent(s): 30507a7
Files changed (1) hide show
  1. app.py +205 -11
app.py CHANGED
@@ -7,6 +7,7 @@ import warnings
7
  from pathlib import Path
8
  from typing import Optional, Tuple
9
  from fastapi import FastAPI, UploadFile, File, HTTPException
 
10
  from fastapi.responses import JSONResponse
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
  from docx import Document as DocxDocument
@@ -19,10 +20,13 @@ import easyocr
19
 
20
  warnings.filterwarnings("ignore", category=UserWarning, module="pdfminer")
21
  warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")
 
22
 
23
  logging.basicConfig(level=logging.INFO)
24
  logger = logging.getLogger(__name__)
25
 
 
 
26
  from pdfminer.high_level import extract_text as extract_pdf_text
27
 
28
  app = FastAPI(
@@ -31,6 +35,14 @@ app = FastAPI(
31
  version="1.0.0"
32
  )
33
 
 
 
 
 
 
 
 
 
34
  MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
35
  tokenizer = None
36
  model = None
@@ -206,14 +218,80 @@ def extract_text_from_file(file_path: str, file_extension: str) -> str:
206
  else:
207
  raise ValueError(f"Unsupported file type: {extension}. Supported: PDF, DOCX, PPT/PPTX")
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  def review_pitchdeck(text: str) -> dict:
210
  """
211
  Send text to Zephyr model for VC-level review and return structured JSON
 
 
212
  """
213
  if not text or not text.strip():
214
  raise ValueError("No text content provided for review")
215
 
216
- deck_text = text[:12000]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
  system_message = """You are a senior venture capitalist with 15+ years of experience evaluating thousands of pitch decks. You know the patterns that lead to funding vs. ghosting. Based on extensive research analyzing hundreds of decks, these are the critical failure points:
219
 
@@ -237,11 +315,14 @@ THE 5 CRITICAL QUESTIONS every deck must answer clearly:
237
 
238
  Be brutally honest. Commercial clarity keeps doors open - GTM and financials get you funded. Emotion opens the door, but logic closes the deal."""
239
 
240
- user_message = f"""Deck Content:
 
 
 
 
241
  {deck_text}
242
 
243
- TASK:
244
- Evaluate this deck against these real-world failure patterns. Check specifically for: commercial backbone, credible market sizing, GTM clarity, real traction metrics, team positioning, moat definition, specific ask, slide count/clarity, and financial logic.
245
 
246
  Produce ONLY valid JSON with these exact fields:
247
 
@@ -306,15 +387,30 @@ Produce ONLY valid JSON with these exact fields:
306
  ]
307
 
308
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
309
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=8192).to(model.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
 
311
  outputs = model.generate(
312
  **inputs,
313
- max_new_tokens=2000,
314
  temperature=0.3,
315
  do_sample=True,
316
- top_p=0.9,
317
- pad_token_id=tokenizer.eos_token_id
 
318
  )
319
  raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
320
 
@@ -337,6 +433,90 @@ Produce ONLY valid JSON with these exact fields:
337
  logger.error(f"Model generation error: {e}")
338
  raise ValueError(f"Error during model inference: {str(e)}")
339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  def generate_improvement_pointers(review: dict) -> dict:
341
  """Generate specific improvement pointers for decks below 80% or lacking clarity"""
342
  score = review.get("score", 0)
@@ -399,14 +579,28 @@ Return ONLY valid JSON:
399
  ]
400
 
401
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
402
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=8192).to(model.device)
 
 
 
 
 
 
 
 
 
 
 
 
403
 
404
  outputs = model.generate(
405
  **inputs,
406
- max_new_tokens=1500,
407
  temperature=0.4,
408
  do_sample=True,
409
- pad_token_id=tokenizer.eos_token_id
 
 
410
  )
411
  raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
412
 
 
7
  from pathlib import Path
8
  from typing import Optional, Tuple
9
  from fastapi import FastAPI, UploadFile, File, HTTPException
10
+ from fastapi.middleware.cors import CORSMiddleware
11
  from fastapi.responses import JSONResponse
12
  from transformers import AutoTokenizer, AutoModelForCausalLM
13
  from docx import Document as DocxDocument
 
20
 
21
  warnings.filterwarnings("ignore", category=UserWarning, module="pdfminer")
22
  warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")
23
+ warnings.filterwarnings("ignore", message=".*Cannot set gray.*")
24
 
25
  logging.basicConfig(level=logging.INFO)
26
  logger = logging.getLogger(__name__)
27
 
28
+ logging.getLogger("pdfminer").setLevel(logging.ERROR)
29
+
30
  from pdfminer.high_level import extract_text as extract_pdf_text
31
 
32
  app = FastAPI(
 
35
  version="1.0.0"
36
  )
37
 
38
+ app.add_middleware(
39
+ CORSMiddleware,
40
+ allow_origins=["*"],
41
+ allow_credentials=True,
42
+ allow_methods=["*"],
43
+ allow_headers=["*"],
44
+ )
45
+
46
  MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
47
  tokenizer = None
48
  model = None
 
218
  else:
219
  raise ValueError(f"Unsupported file type: {extension}. Supported: PDF, DOCX, PPT/PPTX")
220
 
221
+ def chunk_text(text: str, chunk_size: int = 6000, overlap: int = 500) -> list:
222
+ """
223
+ Split text into overlapping chunks for processing
224
+ """
225
+ if len(text) <= chunk_size:
226
+ return [text]
227
+
228
+ chunks = []
229
+ start = 0
230
+
231
+ while start < len(text):
232
+ end = start + chunk_size
233
+
234
+ if end >= len(text):
235
+ chunks.append(text[start:])
236
+ break
237
+
238
+ chunk_end = text.rfind('\n\n', start, end)
239
+ if chunk_end == -1:
240
+ chunk_end = text.rfind('\n', start, end)
241
+ if chunk_end == -1:
242
+ chunk_end = text.rfind('. ', start, end)
243
+ if chunk_end == -1:
244
+ chunk_end = end
245
+
246
+ chunks.append(text[start:chunk_end])
247
+ start = chunk_end - overlap
248
+ if start < 0:
249
+ start = 0
250
+
251
+ return chunks
252
+
253
  def review_pitchdeck(text: str) -> dict:
254
  """
255
  Send text to Zephyr model for VC-level review and return structured JSON
256
+ Uses chunking for long documents to improve processing speed
257
+ Zephyr-7b-beta has 4096 token context limit
258
  """
259
  if not text or not text.strip():
260
  raise ValueError("No text content provided for review")
261
 
262
+ max_tokens = 3800
263
+ estimated_chars_per_token = 4
264
+
265
+ max_text_length = int(max_tokens * estimated_chars_per_token * 0.8)
266
+
267
+ if len(text) > max_text_length:
268
+ logger.info(f"Text length ({len(text)} chars) exceeds safe limit ({max_text_length} chars), using chunking strategy")
269
+ chunks = chunk_text(text[:max_text_length], chunk_size=5000, overlap=500)
270
+
271
+ logger.info(f"Processing {len(chunks)} chunks...")
272
+
273
+ slide_reviews_combined = []
274
+ all_insights = []
275
+
276
+ for i, chunk in enumerate(chunks):
277
+ logger.info(f"Processing chunk {i+1}/{len(chunks)} ({len(chunk)} chars)...")
278
+ chunk_result = _review_chunk(chunk, is_partial=True, chunk_num=i+1, total_chunks=len(chunks))
279
+
280
+ if chunk_result.get("slide_reviews"):
281
+ slide_reviews_combined.extend(chunk_result["slide_reviews"])
282
+
283
+ if chunk_result.get("vc_insights"):
284
+ all_insights.append(chunk_result["vc_insights"])
285
+
286
+ logger.info("Combining chunk results into final review...")
287
+ return _combine_chunk_results(slide_reviews_combined, all_insights, text[:max_text_length])
288
+
289
+ deck_text = text[:6000]
290
+ return _review_chunk(deck_text, is_partial=False)
291
+
292
+ def _review_chunk(deck_text: str, is_partial: bool = False, chunk_num: int = 1, total_chunks: int = 1) -> dict:
293
+
294
+ chunk_context = f"\n\n[Processing chunk {chunk_num} of {total_chunks} - focus on slides in this section]" if is_partial else ""
295
 
296
  system_message = """You are a senior venture capitalist with 15+ years of experience evaluating thousands of pitch decks. You know the patterns that lead to funding vs. ghosting. Based on extensive research analyzing hundreds of decks, these are the critical failure points:
297
 
 
315
 
316
  Be brutally honest. Commercial clarity keeps doors open - GTM and financials get you funded. Emotion opens the door, but logic closes the deal."""
317
 
318
+ task_instruction = """TASK:
319
+ Evaluate this deck against these real-world failure patterns. Check specifically for: commercial backbone, credible market sizing, GTM clarity, real traction metrics, team positioning, moat definition, specific ask, slide count/clarity, and financial logic.""" if not is_partial else """TASK:
320
+ Review this section of the deck. Extract slide-by-slide analysis. Focus on identifying slide content, titles, and issues. Note: This is part of a larger deck - provide detailed slide reviews for this section only."""
321
+
322
+ user_message = f"""Deck Content{chunk_context}:
323
  {deck_text}
324
 
325
+ {task_instruction}
 
326
 
327
  Produce ONLY valid JSON with these exact fields:
328
 
 
387
  ]
388
 
389
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
390
+
391
+ prompt_tokens = tokenizer.encode(prompt, return_tensors="pt")
392
+ prompt_token_count = prompt_tokens.shape[1]
393
+
394
+ max_input_tokens = 3800
395
+ max_output_tokens = 4096 - max_input_tokens
396
+
397
+ if prompt_token_count > max_input_tokens:
398
+ logger.warning(f"Prompt is {prompt_token_count} tokens, truncating to {max_input_tokens}")
399
+ prompt_tokens = prompt_tokens[:, :max_input_tokens]
400
+ prompt = tokenizer.decode(prompt_tokens[0], skip_special_tokens=True)
401
+
402
+ logger.info(f"Input tokens: ~{prompt_token_count}, Max output tokens: {max_output_tokens}")
403
+
404
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_tokens).to(model.device)
405
 
406
  outputs = model.generate(
407
  **inputs,
408
+ max_new_tokens=min(1500, max_output_tokens),
409
  temperature=0.3,
410
  do_sample=True,
411
+ top_p=0.95,
412
+ pad_token_id=tokenizer.eos_token_id,
413
+ use_cache=True
414
  )
415
  raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
416
 
 
433
  logger.error(f"Model generation error: {e}")
434
  raise ValueError(f"Error during model inference: {str(e)}")
435
 
436
+ def _combine_chunk_results(slide_reviews: list, insights: list, full_text: str) -> dict:
437
+ """
438
+ Combine results from multiple chunks into a single comprehensive review
439
+ """
440
+ system_message = """You are synthesizing multiple partial reviews of a pitch deck into one comprehensive VC evaluation."""
441
+
442
+ user_message = f"""You have received partial reviews of a pitch deck. Combine them into one final comprehensive review.
443
+
444
+ Slide Reviews from chunks:
445
+ {json.dumps(slide_reviews[:50], indent=2)}
446
+
447
+ Key Insights:
448
+ {json.dumps(insights, indent=2)}
449
+
450
+ Full Deck Length: {len(full_text)} characters
451
+
452
+ Produce a FINAL comprehensive review with the same JSON structure as before, consolidating all findings."""
453
+
454
+ try:
455
+ messages = [
456
+ {"role": "system", "content": system_message},
457
+ {"role": "user", "content": user_message}
458
+ ]
459
+
460
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
461
+
462
+ prompt_tokens = tokenizer.encode(prompt, return_tensors="pt")
463
+ prompt_token_count = prompt_tokens.shape[1]
464
+
465
+ max_input_tokens = 3800
466
+ max_output_tokens = 4096 - max_input_tokens
467
+
468
+ if prompt_token_count > max_input_tokens:
469
+ logger.warning(f"Combine prompt is {prompt_token_count} tokens, truncating to {max_input_tokens}")
470
+ prompt_tokens = prompt_tokens[:, :max_input_tokens]
471
+ prompt = tokenizer.decode(prompt_tokens[0], skip_special_tokens=True)
472
+
473
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_tokens).to(model.device)
474
+
475
+ outputs = model.generate(
476
+ **inputs,
477
+ max_new_tokens=min(1500, max_output_tokens),
478
+ temperature=0.3,
479
+ do_sample=True,
480
+ top_p=0.95,
481
+ pad_token_id=tokenizer.eos_token_id,
482
+ use_cache=True
483
+ )
484
+ raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
485
+
486
+ if "<|assistant|>" in raw_output:
487
+ raw_output = raw_output.split("<|assistant|>")[-1]
488
+
489
+ start = raw_output.find('{')
490
+ end = raw_output.rfind('}') + 1
491
+
492
+ if start == -1 or end == 0:
493
+ logger.warning("Failed to parse combined result, returning basic structure")
494
+ return {
495
+ "verdict": "Follow-up",
496
+ "score": 70,
497
+ "grade": "B",
498
+ "top_line": "Deck reviewed across multiple sections",
499
+ "slide_reviews": slide_reviews[:20],
500
+ "note": "Combined from chunked processing"
501
+ }
502
+
503
+ combined_json = json.loads(raw_output[start:end])
504
+
505
+ if slide_reviews and not combined_json.get("slide_reviews"):
506
+ combined_json["slide_reviews"] = slide_reviews[:30]
507
+
508
+ return combined_json
509
+
510
+ except Exception as e:
511
+ logger.warning(f"Combining chunks failed: {e}, returning first chunk result")
512
+ return {
513
+ "verdict": "Follow-up",
514
+ "score": 70,
515
+ "grade": "B",
516
+ "top_line": "Deck processed in chunks",
517
+ "slide_reviews": slide_reviews[:20] if slide_reviews else []
518
+ }
519
+
520
  def generate_improvement_pointers(review: dict) -> dict:
521
  """Generate specific improvement pointers for decks below 80% or lacking clarity"""
522
  score = review.get("score", 0)
 
579
  ]
580
 
581
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
582
+
583
+ prompt_tokens = tokenizer.encode(prompt, return_tensors="pt")
584
+ prompt_token_count = prompt_tokens.shape[1]
585
+
586
+ max_input_tokens = 3800
587
+ max_output_tokens = 4096 - max_input_tokens
588
+
589
+ if prompt_token_count > max_input_tokens:
590
+ logger.warning(f"Improvement prompt is {prompt_token_count} tokens, truncating to {max_input_tokens}")
591
+ prompt_tokens = prompt_tokens[:, :max_input_tokens]
592
+ prompt = tokenizer.decode(prompt_tokens[0], skip_special_tokens=True)
593
+
594
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_tokens).to(model.device)
595
 
596
  outputs = model.generate(
597
  **inputs,
598
+ max_new_tokens=min(1000, max_output_tokens),
599
  temperature=0.4,
600
  do_sample=True,
601
+ top_p=0.95,
602
+ pad_token_id=tokenizer.eos_token_id,
603
+ use_cache=True
604
  )
605
  raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
606