Spaces:
Sleeping
Sleeping
Update app.py to use new flirt_dataset.jsonl format
Browse filesModified the code to load and process JSONL dataset format instead of plain text. Added proper JSON parsing and context formatting for better AI responses. Updated title and description to reflect JSONL usage.
app.py
CHANGED
|
@@ -19,18 +19,35 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 19 |
|
| 20 |
# Load dataset for context
|
| 21 |
def load_dataset():
|
| 22 |
-
dataset_file = "
|
| 23 |
if os.path.exists(dataset_file):
|
|
|
|
| 24 |
with open(dataset_file, 'r', encoding='utf-8') as f:
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
# Load the dataset content
|
| 29 |
dataset_content = load_dataset()
|
| 30 |
|
| 31 |
def generate_response(prompt, max_new_tokens=100):
|
| 32 |
# Add dataset context to the prompt for better responses
|
| 33 |
-
context =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
# Format the prompt for Phi-3
|
| 36 |
formatted_prompt = f"<|user|>\n{context}{prompt}<|end|>\n<|assistant|>\n"
|
|
@@ -59,8 +76,8 @@ iface = gr.Interface(
|
|
| 59 |
gr.Slider(minimum=10, maximum=200, value=100, label="Max New Tokens")
|
| 60 |
],
|
| 61 |
outputs=gr.Textbox(label="AI Response"),
|
| 62 |
-
title="Flirt-AI with Phi-3-Mini (Updated with
|
| 63 |
-
description="Chat with AI powered by Microsoft's Phi-3-mini model using the new
|
| 64 |
)
|
| 65 |
|
| 66 |
if __name__ == "__main__":
|
|
|
|
| 19 |
|
| 20 |
# Load dataset for context
|
| 21 |
def load_dataset():
|
| 22 |
+
dataset_file = "flirt_dataset.jsonl"
|
| 23 |
if os.path.exists(dataset_file):
|
| 24 |
+
dataset_entries = []
|
| 25 |
with open(dataset_file, 'r', encoding='utf-8') as f:
|
| 26 |
+
for line in f:
|
| 27 |
+
try:
|
| 28 |
+
entry = json.loads(line.strip())
|
| 29 |
+
dataset_entries.append(entry)
|
| 30 |
+
except json.JSONDecodeError:
|
| 31 |
+
continue
|
| 32 |
+
return dataset_entries
|
| 33 |
+
return []
|
| 34 |
|
| 35 |
# Load the dataset content
|
| 36 |
dataset_content = load_dataset()
|
| 37 |
|
| 38 |
def generate_response(prompt, max_new_tokens=100):
|
| 39 |
# Add dataset context to the prompt for better responses
|
| 40 |
+
context = ""
|
| 41 |
+
if dataset_content:
|
| 42 |
+
# Use first few entries as context
|
| 43 |
+
context_entries = dataset_content[:3] # Use first 3 entries
|
| 44 |
+
context_text = ""
|
| 45 |
+
for entry in context_entries:
|
| 46 |
+
if 'input' in entry and 'output' in entry:
|
| 47 |
+
context_text += f"User: {entry['input']}\nAssistant: {entry['output']}\n\n"
|
| 48 |
+
elif 'text' in entry:
|
| 49 |
+
context_text += f"{entry['text']}\n\n"
|
| 50 |
+
context = f"Dataset context:\n{context_text}\n" if context_text else ""
|
| 51 |
|
| 52 |
# Format the prompt for Phi-3
|
| 53 |
formatted_prompt = f"<|user|>\n{context}{prompt}<|end|>\n<|assistant|>\n"
|
|
|
|
| 76 |
gr.Slider(minimum=10, maximum=200, value=100, label="Max New Tokens")
|
| 77 |
],
|
| 78 |
outputs=gr.Textbox(label="AI Response"),
|
| 79 |
+
title="Flirt-AI with Phi-3-Mini (Updated with JSONL Dataset)",
|
| 80 |
+
description="Chat with AI powered by Microsoft's Phi-3-mini model using the new flirt_dataset.jsonl dataset!"
|
| 81 |
)
|
| 82 |
|
| 83 |
if __name__ == "__main__":
|