lingadevaruhp commited on
Commit
fc50fe8
·
verified ·
1 Parent(s): d3a239a

Update app.py to use new flirt_dataset.jsonl format

Browse files

Modified the code to load and process JSONL dataset format instead of plain text. Added proper JSON parsing and context formatting for better AI responses. Updated title and description to reflect JSONL usage.

Files changed (1) hide show
  1. app.py +23 -6
app.py CHANGED
@@ -19,18 +19,35 @@ model = AutoModelForCausalLM.from_pretrained(
19
 
20
  # Load dataset for context
21
  def load_dataset():
22
- dataset_file = "2000-data-set.txt"
23
  if os.path.exists(dataset_file):
 
24
  with open(dataset_file, 'r', encoding='utf-8') as f:
25
- return f.read()
26
- return ""
 
 
 
 
 
 
27
 
28
  # Load the dataset content
29
  dataset_content = load_dataset()
30
 
31
  def generate_response(prompt, max_new_tokens=100):
32
  # Add dataset context to the prompt for better responses
33
- context = f"Dataset context: {dataset_content[:500]}...\n\n" if dataset_content else ""
 
 
 
 
 
 
 
 
 
 
34
 
35
  # Format the prompt for Phi-3
36
  formatted_prompt = f"<|user|>\n{context}{prompt}<|end|>\n<|assistant|>\n"
@@ -59,8 +76,8 @@ iface = gr.Interface(
59
  gr.Slider(minimum=10, maximum=200, value=100, label="Max New Tokens")
60
  ],
61
  outputs=gr.Textbox(label="AI Response"),
62
- title="Flirt-AI with Phi-3-Mini (Updated with New Dataset)",
63
- description="Chat with AI powered by Microsoft's Phi-3-mini model using the new 2000-data-set.txt dataset!"
64
  )
65
 
66
  if __name__ == "__main__":
 
19
 
20
  # Load dataset for context
21
  def load_dataset():
22
+ dataset_file = "flirt_dataset.jsonl"
23
  if os.path.exists(dataset_file):
24
+ dataset_entries = []
25
  with open(dataset_file, 'r', encoding='utf-8') as f:
26
+ for line in f:
27
+ try:
28
+ entry = json.loads(line.strip())
29
+ dataset_entries.append(entry)
30
+ except json.JSONDecodeError:
31
+ continue
32
+ return dataset_entries
33
+ return []
34
 
35
  # Load the dataset content
36
  dataset_content = load_dataset()
37
 
38
  def generate_response(prompt, max_new_tokens=100):
39
  # Add dataset context to the prompt for better responses
40
+ context = ""
41
+ if dataset_content:
42
+ # Use first few entries as context
43
+ context_entries = dataset_content[:3] # Use first 3 entries
44
+ context_text = ""
45
+ for entry in context_entries:
46
+ if 'input' in entry and 'output' in entry:
47
+ context_text += f"User: {entry['input']}\nAssistant: {entry['output']}\n\n"
48
+ elif 'text' in entry:
49
+ context_text += f"{entry['text']}\n\n"
50
+ context = f"Dataset context:\n{context_text}\n" if context_text else ""
51
 
52
  # Format the prompt for Phi-3
53
  formatted_prompt = f"<|user|>\n{context}{prompt}<|end|>\n<|assistant|>\n"
 
76
  gr.Slider(minimum=10, maximum=200, value=100, label="Max New Tokens")
77
  ],
78
  outputs=gr.Textbox(label="AI Response"),
79
+ title="Flirt-AI with Phi-3-Mini (Updated with JSONL Dataset)",
80
+ description="Chat with AI powered by Microsoft's Phi-3-mini model using the new flirt_dataset.jsonl dataset!"
81
  )
82
 
83
  if __name__ == "__main__":