AlyanAkram commited on
Commit
2a668e8
·
verified ·
1 Parent(s): 9aef97b

Delete app

Browse files
app/.gitignore DELETED
@@ -1,43 +0,0 @@
1
- # Python artifacts
2
- __pycache__/
3
- *.py[cod]
4
- *.pyo
5
- *.pyd
6
- *.so
7
- *.egg-info/
8
-
9
- # Virtual environments
10
- venv/
11
- env/
12
- .venv/
13
-
14
- # IDE/editor files
15
- .vscode/
16
- .idea/
17
- *.log
18
- .DS_Store
19
- Thumbs.db
20
-
21
- # Model weights and training artifacts
22
- detector/models/
23
- detector/training_data/
24
- detector/output_reports/
25
- test_files/
26
- reports/
27
- *.safetensors
28
- *.bin
29
- *.pt
30
- *.ckpt
31
-
32
- # Data files
33
- ai_training_dataset.json
34
-
35
- # Optional build/deploy stuff
36
- build/
37
- tmp/
38
- .cache/
39
-
40
- # Environment and config
41
- .env
42
- *.env
43
- .env.*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/auth.py DELETED
@@ -1,37 +0,0 @@
1
- from fastapi import APIRouter, HTTPException
2
- from pydantic import BaseModel
3
- from supabase import create_client, Client
4
- import os
5
-
6
- SUPABASE_URL = "https://ylyxgffttgvvjyrfovpl.supabase.co"
7
- SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InlseXhnZmZ0dGd2dmp5cmZvdnBsIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NTA0MTc4NDcsImV4cCI6MjA2NTk5Mzg0N30.a6-biroEh-TNTS8E_uAYYt7mgdY2A-xexjCzYp1MsuI"
8
-
9
- supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
10
-
11
- router = APIRouter()
12
-
13
- class UserAuth(BaseModel):
14
- email: str
15
- password: str
16
-
17
- @router.post("/signup")
18
- def signup(user: UserAuth):
19
- try:
20
- result = supabase.auth.sign_up({
21
- "email": user.email,
22
- "password": user.password
23
- })
24
- return {"message": "Signup successful", "user": result.user}
25
- except Exception as e:
26
- raise HTTPException(status_code=400, detail=str(e))
27
-
28
- @router.post("/login")
29
- def login(user: UserAuth):
30
- try:
31
- result = supabase.auth.sign_in_with_password({
32
- "email": user.email,
33
- "password": user.password
34
- })
35
- return {"access_token": result.session.access_token, "user_id": result.user.id}
36
- except Exception as e:
37
- raise HTTPException(status_code=401, detail="Invalid credentials")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/detector/create_dataset.py DELETED
@@ -1,45 +0,0 @@
1
- import os
2
- import json
3
- from preprocess import extract_paragraphs
4
-
5
- # Label mapping
6
- LABELS = {
7
- "ai": 1,
8
- "human": 0,
9
- "mixed": 2 # You can change to 2 if doing 3-class classification
10
- }
11
-
12
- root_dir = "training_data"
13
- dataset = []
14
-
15
- for label_folder in os.listdir(root_dir):
16
- folder_path = os.path.join(root_dir, label_folder)
17
- if not os.path.isdir(folder_path):
18
- continue
19
-
20
- label = LABELS.get(label_folder.lower())
21
- if label is None:
22
- continue
23
-
24
- for filename in os.listdir(folder_path):
25
- if not filename.endswith((".pdf", ".docx")):
26
- continue
27
-
28
- file_path = os.path.join(folder_path, filename)
29
- print(f"📄 Extracting: {file_path}")
30
- try:
31
- paragraphs = extract_paragraphs(file_path)
32
- for para in paragraphs:
33
- if para.strip():
34
- dataset.append({
35
- "text": para.strip(),
36
- "label": label
37
- })
38
- except Exception as e:
39
- print(f"❌ Failed: {file_path} — {str(e)}")
40
-
41
- # Save dataset
42
- with open("ai_training_dataset.json", "w", encoding="utf-8") as f:
43
- json.dump(dataset, f, indent=2, ensure_ascii=False)
44
-
45
- print(f"\n✅ Saved {len(dataset)} samples.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/detector/custom_model.py DELETED
@@ -1,149 +0,0 @@
1
- import os
2
- import torch
3
- import nltk
4
- from pathlib import Path
5
- from nltk.tokenize import sent_tokenize
6
- from transformers import RobertaTokenizer, RobertaForSequenceClassification
7
- from reportlab.lib.pagesizes import A4
8
- from reportlab.pdfgen import canvas
9
- from reportlab.lib import colors
10
-
11
- # === NLTK tokenizer ===
12
- nltk.download("punkt")
13
-
14
- # === Model loading: Hugging Face (Render) vs Local (Dev) ===
15
- USE_HF_MODEL = os.getenv("USE_HF_MODEL") == "1"
16
-
17
- if USE_HF_MODEL:
18
- from huggingface_hub import login
19
-
20
- hf_token = os.getenv("HF_TOKEN")
21
- if hf_token:
22
- login(token=hf_token)
23
-
24
- MODEL_PATH = "AlyanAkram/stealth-roberta"
25
- tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, token=hf_token)
26
- model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, token=hf_token)
27
- else:
28
- MODEL_PATH = "./detector/models/roberta-detector"
29
- tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
30
- model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, local_files_only=True)
31
-
32
- model.eval().to("cuda" if torch.cuda.is_available() else "cpu")
33
- device = next(model.parameters()).device
34
-
35
-
36
- # === AI classification threshold ===
37
- AI_THRESHOLD = 0.5
38
-
39
- # === Report directory ===
40
- REPORT_DIR = Path(__file__).resolve().parent.parent / "reports"
41
- REPORT_DIR.mkdir(exist_ok=True)
42
-
43
- def analyze_text(text: str):
44
- results = []
45
- paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
46
- ai_count = 0
47
- total_sentences = 0
48
-
49
- for paragraph in paragraphs:
50
- sentence_results = []
51
- sentences = sent_tokenize(paragraph)
52
- for sentence in sentences:
53
- inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
54
-
55
- with torch.no_grad():
56
- outputs = model(**inputs)
57
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]
58
- ai_prob = probs[1].item()
59
-
60
- is_ai = ai_prob >= AI_THRESHOLD
61
- sentence_results.append((sentence, is_ai, ai_prob))
62
-
63
- total_sentences += 1
64
- if is_ai:
65
- ai_count += 1
66
-
67
- results.append(sentence_results)
68
-
69
- overall_score = round((ai_count / total_sentences) * 100, 2) if total_sentences else 0
70
-
71
- return {
72
- "overall_ai_percent": overall_score,
73
- "total_sentences": total_sentences,
74
- "ai_sentences": ai_count,
75
- "results": results
76
- }
77
-
78
- def generate_pdf_report(results: dict, filename: str) -> str:
79
- pdf_path = REPORT_DIR / f"{filename}.pdf"
80
-
81
- c = canvas.Canvas(str(pdf_path), pagesize=A4)
82
- width, height = A4
83
- x, y = 40, height - 60
84
- line_height = 18
85
- font_size = 12
86
-
87
- # Header
88
- c.setFont("Helvetica-Bold", 14)
89
- c.drawString(x, y, f"📄 AI Detection Report: {filename}")
90
- y -= 25
91
- c.setFont("Helvetica", 12)
92
- c.drawString(x, y, f"🧠 AI Detected: {results['overall_ai_percent']}% of {results['total_sentences']} sentences")
93
- y -= 30
94
- c.setFont("Helvetica", font_size)
95
-
96
- for para_result in results["results"]:
97
- if not para_result:
98
- y -= line_height
99
- continue
100
-
101
- for sentence, is_ai, _ in para_result:
102
- if y < 50:
103
- c.showPage()
104
- y = height - 50
105
- c.setFont("Helvetica", font_size)
106
-
107
- sentence = sentence.strip()
108
- if not sentence:
109
- continue
110
-
111
- # Wrap long sentences
112
- max_width = width - 80
113
- words = sentence.split()
114
- current_line = ""
115
- for word in words:
116
- test_line = current_line + " " + word if current_line else word
117
- if c.stringWidth(test_line, "Helvetica", font_size) > max_width:
118
- if is_ai:
119
- text_width = c.stringWidth(current_line, "Helvetica", font_size)
120
- c.setFillColor(colors.cyan)
121
- c.rect(x - 2, y - 4, text_width + 4, line_height + 2, fill=True, stroke=False)
122
- c.setFillColor(colors.black)
123
-
124
- c.drawString(x, y, current_line)
125
- y -= line_height
126
- current_line = word
127
- else:
128
- current_line = test_line
129
-
130
- if current_line:
131
- if y < 50:
132
- c.showPage()
133
- y = height - 50
134
- c.setFont("Helvetica", font_size)
135
-
136
- if is_ai:
137
- text_width = c.stringWidth(current_line, "Helvetica", font_size)
138
- c.setFillColor(colors.cyan)
139
- c.rect(x - 2, y - 4, text_width + 4, line_height + 2, fill=True, stroke=False)
140
- c.setFillColor(colors.black)
141
-
142
- c.drawString(x, y, current_line)
143
- y -= line_height
144
-
145
- y -= line_height # Paragraph spacing
146
-
147
- c.save()
148
- return f"{filename}.pdf"
149
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/detector/detector.py DELETED
@@ -1,112 +0,0 @@
1
- import os
2
- import sys
3
- import torch
4
- import docx
5
- import nltk
6
- from nltk.tokenize import sent_tokenize
7
- from transformers import RobertaTokenizer, RobertaForSequenceClassification
8
- from reportlab.lib.pagesizes import A4
9
- from reportlab.pdfgen import canvas
10
- from reportlab.lib import colors
11
-
12
- nltk.download("punkt")
13
-
14
- # Load model
15
- model_dir = "./models/roberta-detector"
16
- tokenizer = RobertaTokenizer.from_pretrained(model_dir)
17
- model = RobertaForSequenceClassification.from_pretrained(model_dir)
18
- model.eval().to("cuda" if torch.cuda.is_available() else "cpu")
19
- device = next(model.parameters()).device
20
-
21
- # === THRESHOLD CONFIG ===
22
- AI_THRESHOLD = 0.50 # Adjust this as needed for better results
23
-
24
- # === Input File ===
25
- filepath = sys.argv[1]
26
- filename = os.path.splitext(os.path.basename(filepath))[0]
27
- output_dir = "output_reports"
28
- os.makedirs(output_dir, exist_ok=True)
29
- output_path = os.path.join(output_dir, f"{filename}_report.pdf")
30
-
31
- # === DOCX Reader ===
32
- def read_docx_paragraphs(path):
33
- doc = docx.Document(path)
34
- return [para.text for para in doc.paragraphs]
35
-
36
- paragraphs = read_docx_paragraphs(filepath)
37
-
38
- # === Detection Loop ===
39
- results = []
40
- total_sentences = 0
41
- ai_sentences = 0
42
-
43
- for paragraph in paragraphs:
44
- if not paragraph.strip():
45
- results.append([]) # preserve spacing
46
- continue
47
-
48
- sentences = sent_tokenize(paragraph)
49
- para_result = []
50
-
51
- for sentence in sentences:
52
- inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
53
- with torch.no_grad():
54
- outputs = model(**inputs)
55
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]
56
- ai_prob = probs[1].item()
57
-
58
- is_ai = ai_prob >= AI_THRESHOLD
59
- para_result.append((sentence, is_ai, ai_prob))
60
-
61
- total_sentences += 1
62
- if is_ai:
63
- ai_sentences += 1
64
-
65
- # Debugging
66
- print(f"[DEBUG] AI probability: {ai_prob:.2f} — {'✔ Highlight' if is_ai else '✘ Skip'}")
67
-
68
- results.append(para_result)
69
-
70
- ai_percent = round((ai_sentences / total_sentences) * 100, 2) if total_sentences else 0
71
-
72
- # === PDF Writer ===
73
- c = canvas.Canvas(output_path, pagesize=A4)
74
- width, height = A4
75
- x, y = 40, height - 60
76
- line_height = 18
77
- font_size = 12
78
-
79
- # Title
80
- c.setFont("Helvetica-Bold", 14)
81
- c.drawString(x, y, f"📄 AI Detection Report: {filename}")
82
- y -= 25
83
- c.setFont("Helvetica", 12)
84
- c.drawString(x, y, f"🧠 AI Detected: {ai_percent}% of {total_sentences} sentences")
85
- y -= 30
86
- c.setFont("Helvetica", font_size)
87
-
88
- # Body rendering
89
- for para_result in results:
90
- if not para_result:
91
- y -= line_height
92
- continue
93
-
94
- for sentence, is_ai, ai_prob in para_result:
95
- if y < 50:
96
- c.showPage()
97
- y = height - 50
98
- c.setFont("Helvetica", font_size)
99
-
100
- if is_ai:
101
- text_width = c.stringWidth(sentence, "Helvetica", font_size)
102
- c.setFillColor(colors.cyan)
103
- c.rect(x - 2, y - 4, text_width + 4, line_height + 2, fill=True, stroke=False)
104
- c.setFillColor(colors.black)
105
-
106
- c.drawString(x, y, sentence)
107
- y -= line_height
108
-
109
- y -= line_height # spacing between paragraphs
110
-
111
- c.save()
112
- print(f"\n✅ Report saved: {output_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/detector/preprocess.py DELETED
@@ -1,47 +0,0 @@
1
- import os
2
- import docx
3
- import pdfplumber
4
- import nltk
5
-
6
- nltk.download("punkt")
7
- from nltk.tokenize import sent_tokenize
8
-
9
- def extract_text_from_docx(path):
10
- try:
11
- doc = docx.Document(path)
12
- paragraphs = [para.text.strip() for para in doc.paragraphs if para.text.strip()]
13
- return paragraphs if paragraphs else []
14
- except Exception as e:
15
- print(f"❌ Failed to extract DOCX: {e}")
16
- return []
17
-
18
- def extract_text_from_pdf(path):
19
- try:
20
- with pdfplumber.open(path) as pdf:
21
- all_text = "\n".join(
22
- page.extract_text() for page in pdf.pages if page.extract_text()
23
- )
24
- except Exception as e:
25
- print(f"❌ Failed to extract PDF: {e}")
26
- return []
27
-
28
- if not all_text.strip():
29
- return []
30
-
31
- # Try splitting by paragraphs
32
- paragraphs = [p.strip() for p in all_text.split("\n\n") if p.strip()]
33
- if paragraphs:
34
- return paragraphs
35
-
36
- # Fallback: break into 3–5 sentence chunks
37
- sentences = sent_tokenize(all_text)
38
- return [" ".join(sentences[i:i + 5]) for i in range(0, len(sentences), 5)]
39
-
40
- def extract_paragraphs(path):
41
- ext = os.path.splitext(path)[-1].lower()
42
- if ext == ".docx":
43
- return extract_text_from_docx(path)
44
- elif ext == ".pdf":
45
- return extract_text_from_pdf(path)
46
- else:
47
- raise ValueError(f"Unsupported file type: {ext}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/detector/requirements.txt DELETED
@@ -1,14 +0,0 @@
1
- transformers
2
- torch
3
- scikit-learn
4
- pdfplumber
5
- python-docx
6
- nltk
7
- datasets
8
- fastapi
9
- uvicorn
10
- transformers
11
- torch
12
- nltk
13
- PyPDF2
14
- python-docx
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/detector/train_model.py DELETED
@@ -1,79 +0,0 @@
1
- import json
2
- import torch
3
- from datasets import Dataset
4
- import evaluate
5
- from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
6
-
7
- # Load dataset from JSON
8
- with open("ai_training_dataset.json", "r", encoding="utf-8") as f:
9
- data = json.load(f)
10
-
11
- # Check that all labels are integers (0, 1, or 2)
12
- for item in data:
13
- item["label"] = int(item["label"]) # Ensure type is correct
14
-
15
- # Convert to HuggingFace Dataset
16
- dataset = Dataset.from_list(data)
17
- dataset = dataset.train_test_split(test_size=0.2)
18
- train_dataset = dataset["train"]
19
- eval_dataset = dataset["test"]
20
-
21
- # Load tokenizer
22
- model_name = "roberta-base"
23
- tokenizer = RobertaTokenizer.from_pretrained(model_name)
24
-
25
- # Tokenization function
26
- def tokenize(example):
27
- return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)
28
-
29
- # Tokenize datasets
30
- train_dataset = train_dataset.map(tokenize, batched=True)
31
- eval_dataset = eval_dataset.map(tokenize, batched=True)
32
-
33
- # Keep only model-required fields
34
- train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
35
- eval_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
36
-
37
- # Load model with 3 output labels
38
- model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=3)
39
-
40
- # Optional: define metrics
41
- accuracy = evaluate.load("accuracy")
42
-
43
- def compute_metrics(eval_pred):
44
- logits, labels = eval_pred
45
- preds = torch.argmax(torch.tensor(logits), dim=1)
46
- return accuracy.compute(predictions=preds, references=labels)
47
-
48
- # Training configuration
49
- training_args = TrainingArguments(
50
- output_dir="./models/roberta-detector",
51
- evaluation_strategy="epoch", # MUST match save_strategy
52
- save_strategy="epoch",
53
- per_device_train_batch_size=4,
54
- per_device_eval_batch_size=4,
55
- num_train_epochs=3,
56
- logging_steps=10,
57
- save_total_limit=1,
58
- load_best_model_at_end=True,
59
- metric_for_best_model="eval_loss",
60
- report_to="none", # Prevents WandB issues
61
- )
62
-
63
- # Trainer setup
64
- trainer = Trainer(
65
- model=model,
66
- args=training_args,
67
- train_dataset=train_dataset,
68
- eval_dataset=eval_dataset,
69
- compute_metrics=compute_metrics,
70
- )
71
-
72
- # Train
73
- trainer.train()
74
-
75
- # Save model + tokenizer
76
- model.save_pretrained("./models/roberta-detector")
77
- tokenizer.save_pretrained("./models/roberta-detector")
78
-
79
- print("✅ Model trained and saved.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/detector/utils.py DELETED
@@ -1,21 +0,0 @@
1
- # detector/utils.py
2
-
3
- from PyPDF2 import PdfReader
4
- import docx
5
-
6
- async def extract_text_from_file(file):
7
- filename = file.filename.lower()
8
-
9
- if filename.endswith(".pdf"):
10
- reader = PdfReader(file.file)
11
- return "\n".join([page.extract_text() or "" for page in reader.pages])
12
-
13
- elif filename.endswith(".docx"):
14
- document = docx.Document(file.file)
15
- return "\n".join([para.text for para in document.paragraphs])
16
-
17
- elif filename.endswith(".txt"):
18
- return (await file.read()).decode("utf-8")
19
-
20
- else:
21
- raise ValueError("Unsupported file type.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/requirements.txt DELETED
@@ -1,101 +0,0 @@
1
- accelerate==1.8.1
2
- aiohappyeyeballs==2.6.1
3
- aiohttp==3.12.13
4
- aiosignal==1.3.2
5
- annotated-types==0.7.0
6
- anyio==4.9.0
7
- async-timeout==5.0.1
8
- attrs==25.3.0
9
- certifi==2025.6.15
10
- cffi==1.17.1
11
- charset-normalizer==3.4.2
12
- click==8.2.1
13
- colorama==0.4.6
14
- cryptography==45.0.4
15
- datasets==3.6.0
16
- defusedxml==0.7.1
17
- deprecation==2.1.0
18
- dill==0.3.8
19
- evaluate==0.4.4
20
- exceptiongroup==1.3.0
21
- fastapi==0.115.13
22
- filelock==3.18.0
23
- fonttools==4.58.4
24
- fpdf2==2.7.8
25
- frozenlist==1.7.0
26
- fsspec==2025.3.0
27
- gotrue==2.12.0
28
- greenlet==3.2.3
29
- h11==0.16.0
30
- h2==4.2.0
31
- hpack==4.1.0
32
- httpcore==1.0.9
33
- httpx==0.28.1
34
- huggingface-hub==0.33.0
35
- hyperframe==6.1.0
36
- idna==3.10
37
- iniconfig==2.1.0
38
- Jinja2==3.1.6
39
- joblib==1.5.1
40
- lxml==5.4.0
41
- MarkupSafe==3.0.2
42
- mpmath==1.3.0
43
- multidict==6.5.0
44
- multiprocess==0.70.16
45
- networkx==3.4.2
46
- nltk==3.8.1
47
- numpy==2.2.6
48
- packaging==25.0
49
- pandas==2.3.0
50
- pdfminer.six==20250506
51
- pdfplumber==0.11.7
52
- pillow==11.0.0
53
- pluggy==1.6.0
54
- postgrest==1.0.2
55
- propcache==0.3.2
56
- psutil==7.0.0
57
- pyarrow==20.0.0
58
- pycparser==2.22
59
- pydantic==2.11.7
60
- pydantic_core==2.33.2
61
- pyee==13.0.0
62
- Pygments==2.19.1
63
- PyJWT==2.10.1
64
- PyMuPDF==1.24.2
65
- PyPDF2==3.0.1
66
- pypdfium2==4.30.1
67
- pytest==8.4.1
68
- pytest-mock==3.14.1
69
- python-dateutil==2.9.0.post0
70
- python-docx==1.2.0
71
- python-dotenv==1.1.1
72
- python-multipart==0.0.20
73
- pytz==2025.2
74
- PyYAML==6.0.2
75
- realtime==2.4.3
76
- regex==2024.11.6
77
- reportlab==4.4.2
78
- requests==2.32.4
79
- safetensors==0.5.3
80
- scikit-learn==1.7.0
81
- scipy==1.15.3
82
- six==1.17.0
83
- sniffio==1.3.1
84
- starlette==0.46.2
85
- storage3==0.11.3
86
- StrEnum==0.4.15
87
- supabase==2.15.3
88
- sympy==1.13.1
89
- threadpoolctl==3.6.0
90
- tomli==2.2.1
91
- torch==2.5.1
92
- tqdm==4.67.1
93
- transformers==4.41.1
94
- typing-inspection==0.4.1
95
- typing_extensions==4.14.0
96
- tzdata==2025.2
97
- urllib3==2.5.0
98
- uvicorn==0.34.3
99
- websockets==14.2
100
- xxhash==3.5.0
101
- yarl==1.20.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/sample.docx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b36b8849df4a2d706e8aa2c2e9be106950f78cef26fd759ac7e2889dbe65e815
3
- size 108513