Spaces:

lingadevaruhp
/

thoshan_Flash_mini

Sleeping

App Files Files Community

lingadevaruhp commited on Sep 14, 2025

Commit

fc50fe8

verified ·

1 Parent(s): d3a239a

Update app.py to use new flirt_dataset.jsonl format

Browse files

Modified the code to load and process JSONL dataset format instead of plain text. Added proper JSON parsing and context formatting for better AI responses. Updated title and description to reflect JSONL usage.

Files changed (1) hide show

app.py +23 -6

app.py CHANGED Viewed

@@ -19,18 +19,35 @@ model = AutoModelForCausalLM.from_pretrained(
 # Load dataset for context
 def load_dataset():
-    dataset_file = "2000-data-set.txt"
     if os.path.exists(dataset_file):
         with open(dataset_file, 'r', encoding='utf-8') as f:
-            return f.read()
-    return ""
 # Load the dataset content
 dataset_content = load_dataset()
 def generate_response(prompt, max_new_tokens=100):
     # Add dataset context to the prompt for better responses
-    context = f"Dataset context: {dataset_content[:500]}...\n\n" if dataset_content else ""
     # Format the prompt for Phi-3
     formatted_prompt = f"<|user|>\n{context}{prompt}<|end|>\n<|assistant|>\n"
@@ -59,8 +76,8 @@ iface = gr.Interface(
         gr.Slider(minimum=10, maximum=200, value=100, label="Max New Tokens")
     ],
     outputs=gr.Textbox(label="AI Response"),
-    title="Flirt-AI with Phi-3-Mini (Updated with New Dataset)",
-    description="Chat with AI powered by Microsoft's Phi-3-mini model using the new 2000-data-set.txt dataset!"
 )
 if __name__ == "__main__":

 # Load dataset for context
 def load_dataset():
+    dataset_file = "flirt_dataset.jsonl"
     if os.path.exists(dataset_file):
+        dataset_entries = []
         with open(dataset_file, 'r', encoding='utf-8') as f:
+            for line in f:
+                try:
+                    entry = json.loads(line.strip())
+                    dataset_entries.append(entry)
+                except json.JSONDecodeError:
+                    continue
+        return dataset_entries
+    return []
 # Load the dataset content
 dataset_content = load_dataset()
 def generate_response(prompt, max_new_tokens=100):
     # Add dataset context to the prompt for better responses
+    context = ""
+    if dataset_content:
+        # Use first few entries as context
+        context_entries = dataset_content[:3]  # Use first 3 entries
+        context_text = ""
+        for entry in context_entries:
+            if 'input' in entry and 'output' in entry:
+                context_text += f"User: {entry['input']}\nAssistant: {entry['output']}\n\n"
+            elif 'text' in entry:
+                context_text += f"{entry['text']}\n\n"
+        context = f"Dataset context:\n{context_text}\n" if context_text else ""
     # Format the prompt for Phi-3
     formatted_prompt = f"<|user|>\n{context}{prompt}<|end|>\n<|assistant|>\n"
         gr.Slider(minimum=10, maximum=200, value=100, label="Max New Tokens")
     ],
     outputs=gr.Textbox(label="AI Response"),
+    title="Flirt-AI with Phi-3-Mini (Updated with JSONL Dataset)",
+    description="Chat with AI powered by Microsoft's Phi-3-mini model using the new flirt_dataset.jsonl dataset!"
 )
 if __name__ == "__main__":