Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
import os
|
| 2 |
import json
|
| 3 |
import time
|
| 4 |
from typing import Dict, List, Tuple
|
|
@@ -15,9 +14,6 @@ from langchain_community.chains.question_answering import load_qa_chain
|
|
| 15 |
from langchain_community.utils import CharacterTextSplitter
|
| 16 |
from transformers import BertTokenizerFast
|
| 17 |
|
| 18 |
-
# Download the DistilBERT tokenizer (~3 MB)
|
| 19 |
-
DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased').save_pretrained('./cache/distilbert-base-uncased-local')
|
| 20 |
-
|
| 21 |
# --- Constants ---
|
| 22 |
MODEL_NAME = "google/flan-t5-xl" # Consider using a more powerful model like 'google/flan-t5-xl'
|
| 23 |
MAX_NEW_TOKENS = 2048 # Increased for better code generation
|
|
@@ -34,6 +30,16 @@ def load_model_and_tokenizer():
|
|
| 34 |
|
| 35 |
model, tokenizer = load_model_and_tokenizer()
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
# --- Agents ---
|
| 38 |
agents = {
|
| 39 |
"WEB_DEV": {
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
import time
|
| 3 |
from typing import Dict, List, Tuple
|
|
|
|
| 14 |
from langchain_community.utils import CharacterTextSplitter
|
| 15 |
from transformers import BertTokenizerFast
|
| 16 |
|
|
|
|
|
|
|
|
|
|
| 17 |
# --- Constants ---
|
| 18 |
MODEL_NAME = "google/flan-t5-xl" # Consider using a more powerful model like 'google/flan-t5-xl'
|
| 19 |
MAX_NEW_TOKENS = 2048 # Increased for better code generation
|
|
|
|
| 30 |
|
| 31 |
model, tokenizer = load_model_and_tokenizer()
|
| 32 |
|
| 33 |
+
PRETRAINED_MODEL_NAME = "distilbert-base-uncased"
|
| 34 |
+
model_path = os.path.join(os.getcwd(), PRETRAINED_MODEL_NAME)
|
| 35 |
+
if not os.path.exists(model_path):
|
| 36 |
+
raise FileNotFoundError("Pre-trained model weight directory {} doesn't exist".format(model_path))
|
| 37 |
+
else:
|
| 38 |
+
print("Found Pre-trained Model at:", model_path)
|
| 39 |
+
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
|
| 40 |
+
# Download the DistilBERT tokenizer (~3 MB)
|
| 41 |
+
DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased').save_pretrained('./cache/distilbert-base-uncased-local')
|
| 42 |
+
|
| 43 |
# --- Agents ---
|
| 44 |
agents = {
|
| 45 |
"WEB_DEV": {
|