File size: 37,473 Bytes
3796cdb
8b06363
1046013
 
3796cdb
 
 
 
 
1046013
 
 
8b06363
 
1046013
8b06363
 
1046013
 
5baa77a
 
8b06363
 
 
 
 
 
 
 
 
 
1046013
8b06363
e30b15f
 
 
 
 
 
 
 
 
 
 
8b06363
e30b15f
 
8b06363
 
 
 
 
1046013
8b06363
 
 
1046013
 
 
5baa77a
1046013
8b06363
3796cdb
8b06363
5baa77a
 
 
 
 
3796cdb
1046013
 
4430bc2
3796cdb
5baa77a
 
 
 
 
 
1046013
3796cdb
 
8b06363
1046013
3796cdb
 
 
 
 
 
5baa77a
 
1046013
 
3796cdb
 
8b06363
1046013
8b06363
 
3796cdb
 
1046013
 
3796cdb
8b06363
 
3796cdb
1046013
8b06363
5baa77a
 
 
 
 
 
 
1046013
 
8b06363
 
 
 
e30b15f
 
 
 
1046013
 
8b06363
 
1046013
 
 
 
 
8b06363
 
1046013
 
 
 
 
 
e30b15f
 
 
 
8b06363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e30b15f
 
 
 
8b06363
1046013
8b06363
1046013
 
8b06363
1046013
 
 
 
 
8b06363
 
1046013
8b06363
 
1046013
8b06363
 
 
 
 
 
1046013
 
8b06363
 
 
1046013
8b06363
 
 
 
 
 
 
 
 
 
 
 
 
e30b15f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b06363
 
 
1046013
 
8b06363
e30b15f
1046013
 
 
8b06363
 
 
 
 
 
 
1046013
5baa77a
 
 
 
 
 
 
 
 
1046013
 
8b06363
1046013
8b06363
 
1046013
 
8b06363
 
1046013
8b06363
 
 
1046013
 
 
 
 
 
 
 
 
 
 
 
8b06363
 
 
 
1046013
 
 
 
 
 
 
 
 
 
 
8b06363
 
 
 
 
1046013
 
 
 
 
 
 
 
 
 
 
8b06363
 
 
1046013
 
 
 
 
 
 
 
 
 
 
 
8b06363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1046013
 
8b06363
1046013
e30b15f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b06363
 
 
 
 
1046013
8b06363
 
 
 
 
 
 
 
1046013
8b06363
 
 
1046013
8b06363
 
1046013
8b06363
 
 
 
 
 
 
 
1046013
8b06363
 
 
 
1046013
8b06363
 
 
 
 
 
1046013
8b06363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231201c
5baa77a
 
 
8b06363
 
 
 
 
 
 
 
 
 
 
 
 
1046013
8b06363
 
 
 
 
 
 
 
e30b15f
8b06363
1046013
8b06363
 
e30b15f
1046013
3796cdb
 
 
8b06363
 
 
e30b15f
8b06363
 
 
 
 
 
 
1046013
8b06363
 
 
 
 
 
 
 
1046013
3796cdb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b06363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1046013
8b06363
5baa77a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b06363
5baa77a
3796cdb
 
 
 
8b06363
 
3796cdb
 
 
 
 
 
 
 
8b06363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e30b15f
8b06363
 
 
 
e30b15f
1046013
8b06363
e30b15f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3796cdb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e30b15f
 
 
 
 
 
 
 
 
 
1046013
8b06363
3796cdb
 
 
 
 
 
 
8b06363
3796cdb
 
e30b15f
8b06363
 
1046013
8b06363
3796cdb
 
 
8b06363
 
1046013
8b06363
 
 
 
 
 
3796cdb
 
 
 
 
 
 
8b06363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1046013
e30b15f
 
 
 
 
 
8b06363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5baa77a
 
 
 
 
 
 
e30b15f
 
8b06363
 
 
 
 
1046013
8b06363
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
# Required imports - ensure all dependencies are installed
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
try:
    import bitsandbytes as bnb
except ImportError:
    print("WARNING: bitsandbytes not installed. Required for 4-bit quantization.")

import time
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import re
from threading import Thread
import numpy as np
from io import StringIO

HF_TOKEN = None

# Global variables to store model, tokenizer and pipe
MODEL = None
TOKENIZER = None
PIPE = None
MODEL_LOADING = False
MODEL_LOADED = False

# Store chat history for different chat sessions
CHATS = {"Main Chat": []}
CURRENT_CHAT = "Main Chat"

# System prompt and generation config
SYSTEM_PROMPT = """You are a helpful AI assistant based on the Mistral-7B-Instruct model. 
You specialize in creating structured JSON data for automation workflows like n8n.
When asked to create JSON for n8n workflows:
1. Structure the data in valid JSON format with proper nesting
2. Include all necessary fields and properties for nodes
3. Format with correct indentation and structure
4. Use proper n8n node syntax and follow their data structure requirements
5. Always validate the JSON before returning it

For JSON workflow nodes, be attentive to detail and include all necessary fields."""

GENERATE_CONFIG = {
    "max_new_tokens": 1024,  # Increased for complex JSON responses
    "temperature": 0.5,      # Slightly lower for more precise JSON
    "top_p": 0.95,
    "top_k": 50,
    "repetition_penalty": 1.1,
    "do_sample": True
}

# File data storage
FILE_DATA = None
ANALYZED_DATA = None

# Function to load the model in background
def load_model_in_background():
    global MODEL, TOKENIZER, PIPE, MODEL_LOADING, MODEL_LOADED, HF_TOKEN
    try:
        MODEL_LOADING = True
        print("Starting model loading process...")
        
        # Check if token is provided
        if not HF_TOKEN:
            MODEL_LOADING = False
            return "Error: HuggingFace token is required. Please enter your token and try again."
        
        # Model identifier - using quantized 4-bit version for reduced memory
        model_id = "mistralai/Mistral-7B-Instruct-v0.3"
        
        print("Loading tokenizer...")
        # Set tokenizer to use legacy format to avoid issues
        # Use the token for authentication
        TOKENIZER = AutoTokenizer.from_pretrained(
            model_id, 
            legacy_format=True,
            token=HF_TOKEN  # Add token here
        )
        
        print("Loading model with optimized settings for limited memory...")
        # Configure model loading with 4-bit quantization for minimum memory usage
        MODEL = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.float16,  # Use half precision
            device_map="auto",  # Automatically distribute across available GPUs/CPU
            low_cpu_mem_usage=True,
            load_in_4bit=True,  # Enable 4-bit quantization
            max_memory={0: "8GiB"},  # Limit memory usage per GPU
            offload_folder="offload_folder",  # Use disk offloading if needed
            offload_state_dict=True,  # Offload state dict to CPU when possible
            token=HF_TOKEN  # Add token here
        )
        
        print("Creating optimized pipeline...")
        # Create text generation pipeline with more efficient settings
        PIPE = pipeline(
            "text-generation",
            model=MODEL,
            tokenizer=TOKENIZER,
            return_full_text=False,
            batch_size=1  # Process one batch at a time to reduce memory
        )
        
        print("Model loading complete!")
        MODEL_LOADING = False
        MODEL_LOADED = True
        return "Model loaded successfully! Ready to generate responses."
    except Exception as e:
        MODEL_LOADING = False
        error_msg = str(e)
        if "401" in error_msg or "authentication" in error_msg.lower():
            return f"Authentication error: Please check your HuggingFace token. Error: {error_msg}"
        elif "access" in error_msg.lower() or "gated" in error_msg.lower():
            return f"Access denied: You may need to request access to this model on HuggingFace. Error: {error_msg}"
        else:
            return f"Error loading model: {error_msg}"

# Function to generate response using the model
def generate_response(prompt, chat_history, progress=gr.Progress()):
    global MODEL, TOKENIZER, PIPE, CHATS, CURRENT_CHAT, SYSTEM_PROMPT, GENERATE_CONFIG, FILE_DATA, ANALYZED_DATA
    
    if not MODEL_LOADED:
        if MODEL_LOADING:
            return chat_history + [("Your message", "Model is still loading. Please wait a moment before sending messages.")]
        else:
            return chat_history + [("Your message", "Model not loaded. Please click 'Load Mistral-7B Model' first.")]
    
    try:
        # Use the current chat's history
        messages = CHATS[CURRENT_CHAT]
        
        # Format conversation history in Mistral's chat format
        conversation = []
        
        # Add system prompt if it exists
        if SYSTEM_PROMPT:
            conversation.append({"role": "system", "content": SYSTEM_PROMPT})
        
        # Add previous messages
        for msg in messages:
            if msg["role"] != "system":  # Skip system messages in the history
                conversation.append({"role": msg["role"], "content": msg["content"]})
        
        # Check if JSON formatting is specifically requested
        is_json_request = any(keyword in prompt.lower() 
                            for keyword in ["json", "n8n", "workflow", "automation", "format"])
        
        # Handle file-related queries by including context
        if ANALYZED_DATA is not None and any(keyword in prompt.lower() 
                                             for keyword in ["file", "data", "analyze", "show", "tell me about", "json"]):
            file_context = ""
            if ANALYZED_DATA["type"] in ["csv", "excel"]:
                # For structured data, provide summary info
                summary = ANALYZED_DATA["summary"]
                file_context = f"""I've analyzed the uploaded {ANALYZED_DATA["type"]} file with {summary["rows"]} rows and {summary["columns"]} columns.
                The columns are: {', '.join(summary["column_names"])}.
                Here's a sample of the data (first 5 rows): {json.dumps(summary["sample"])}
                """
            elif ANALYZED_DATA["type"] == "text":
                # For text data, provide the content if not too large
                summary = ANALYZED_DATA["summary"]
                file_context = f"""I've analyzed the uploaded text file with {summary["word_count"]} words and {summary["line_count"]} lines.
                Here's a preview of the content: {summary["preview"]}
                """
            elif ANALYZED_DATA["type"] == "json":
                # For JSON data
                summary = ANALYZED_DATA["summary"]
                file_context = f"""I've analyzed the uploaded JSON file which contains a {summary["type"]}.
                {"Keys: " + ', '.join(summary["keys"]) if summary["keys"] else ""}
                {"Items: " + str(summary["length"]) if summary["length"] else ""}
                Here's a preview: {summary["preview"]}
                """
                
            # Enhance the user's query with file context
            enhanced_prompt = f"{prompt}\n\nContext about the file: {file_context}"
        else:
            enhanced_prompt = prompt
        
        # If this is a JSON request, add special instructions
        if is_json_request:
            enhanced_prompt += "\n\nPlease generate a valid, properly formatted JSON response suitable for n8n workflows. Include all necessary fields and ensure correct formatting. The JSON should be valid and ready to be imported directly into n8n."
            
        # Add current prompt
        conversation.append({"role": "user", "content": enhanced_prompt})
        
        # Convert to Mistral's chat format
        formatted_prompt = TOKENIZER.apply_chat_template(
            conversation, 
            tokenize=False,
            add_generation_prompt=True
        )
        
        # Generate response with progress reporting
        progress(0, desc="Generating response...")
        
        # Generate response
        response = PIPE(
            formatted_prompt,
            max_new_tokens=GENERATE_CONFIG["max_new_tokens"],
            temperature=GENERATE_CONFIG["temperature"],
            top_p=GENERATE_CONFIG["top_p"],
            top_k=GENERATE_CONFIG["top_k"],
            repetition_penalty=GENERATE_CONFIG["repetition_penalty"],
            do_sample=GENERATE_CONFIG["do_sample"]
        )
        
        progress(1, desc="Response generated!")
        
        # Extract generated text
        generated_text = response[0]["generated_text"]
        
        # Add user message to chat history
        CHATS[CURRENT_CHAT].append({
            "role": "user",
            "content": prompt
        })
        
        # Add assistant response to chat history
        CHATS[CURRENT_CHAT].append({
            "role": "assistant",
            "content": generated_text
        })
        
        # Validate and format JSON if it appears to be a JSON response
        if is_json_request and "```json" in generated_text:
            try:
                # Try to extract JSON from code blocks
                json_match = re.search(r'```json\s*([\s\S]*?)\s*```', generated_text)
                if json_match:
                    json_string = json_match.group(1)
                    # Parse and re-stringify to ensure proper formatting
                    parsed_json = json.loads(json_string)
                    formatted_json = json.dumps(parsed_json, indent=2)
                    
                    # Replace the original JSON with the properly formatted one
                    generated_text = generated_text.replace(json_match.group(0), f"```json\n{formatted_json}\n```")
            except json.JSONDecodeError:
                # If JSON parsing fails, we keep the original response
                pass
        
        # Update the chat history for the Gradio component (fix the tuple format)
        chat_history.append((prompt, generated_text))
        
        return chat_history
    
    except Exception as e:
        error_message = f"Error generating response: {str(e)}"
        return chat_history + [(prompt, error_message)]

# Function to create a new chat
def create_new_chat(chat_name):
    global CHATS, CURRENT_CHAT
    
    if chat_name and chat_name not in CHATS:
        CHATS[chat_name] = []
        CURRENT_CHAT = chat_name
        return f"Created new chat: {chat_name}"
    return "Please enter a unique chat name"

# MODIFICATION 3: Add function to set HuggingFace token
def set_hf_token(token):
    global HF_TOKEN
    if token and token.strip():
        HF_TOKEN = token.strip()
        return "HuggingFace token saved successfully!"
    else:
        return "Please enter a valid HuggingFace token."

# Function to handle file upload and analysis
def analyze_uploaded_file(file):
    global FILE_DATA, ANALYZED_DATA, CHATS, CURRENT_CHAT
    
    if file is None:
        return "No file uploaded."
    
    try:
        file_extension = file.name.split('.')[-1].lower()
        
        if file_extension == 'csv':
            data = pd.read_csv(file.name)
            FILE_DATA = data
            ANALYZED_DATA = {
                "type": "csv",
                "data": data,
                "summary": {
                    "rows": len(data),
                    "columns": len(data.columns),
                    "column_names": list(data.columns),
                    "data_types": {col: str(dtype) for col, dtype in data.dtypes.items()},
                    "sample": data.head(5).to_dict(orient='records')
                }
            }
        
        elif file_extension in ['txt', 'md']:
            with open(file.name, 'r', encoding='utf-8') as f:
                content = f.read()
            FILE_DATA = content
            ANALYZED_DATA = {
                "type": "text",
                "data": content,
                "summary": {
                    "length": len(content),
                    "word_count": len(content.split()),
                    "line_count": len(content.splitlines()),
                    "preview": content[:500] + ("..." if len(content) > 500 else "")
                }
            }
        
        elif file_extension == 'json':
            with open(file.name, 'r', encoding='utf-8') as f:
                content = f.read()
                data = json.loads(content)
            FILE_DATA = data
            ANALYZED_DATA = {
                "type": "json",
                "data": data,
                "summary": {
                    "type": "object" if isinstance(data, dict) else "array",
                    "keys": list(data.keys()) if isinstance(data, dict) else None,
                    "length": len(data) if isinstance(data, list) else None,
                    "preview": str(data)[:500] + ("..." if len(str(data)) > 500 else "")
                }
            }
        
        elif file_extension in ['xls', 'xlsx']:
            data = pd.read_excel(file.name)
            FILE_DATA = data
            ANALYZED_DATA = {
                "type": "excel",
                "data": data,
                "summary": {
                    "rows": len(data),
                    "columns": len(data.columns),
                    "column_names": list(data.columns),
                    "data_types": {col: str(dtype) for col, dtype in data.dtypes.items()},
                    "sample": data.head(5).to_dict(orient='records')
                }
            }
        
        else:
            return f"File type .{file_extension} is not supported."
        
        # Add file summary to current chat as system message
        file_summary = f"File analyzed: {file.name}\n"
        if ANALYZED_DATA['type'] == 'csv' or ANALYZED_DATA['type'] == 'excel':
            file_summary += f"- {ANALYZED_DATA['summary']['rows']} rows, {ANALYZED_DATA['summary']['columns']} columns\n"
            file_summary += f"- Columns: {', '.join(ANALYZED_DATA['summary']['column_names'])}"
        elif ANALYZED_DATA['type'] == 'text':
            file_summary += f"- {ANALYZED_DATA['summary']['word_count']} words, {ANALYZED_DATA['summary']['line_count']} lines"
        elif ANALYZED_DATA['type'] == 'json':
            file_summary += f"- Type: {ANALYZED_DATA['summary']['type']}"
            if ANALYZED_DATA['summary']['keys']:
                file_summary += f"\n- Keys: {', '.join(ANALYZED_DATA['summary']['keys'])}"
        
        # Add system message to current chat
        CHATS[CURRENT_CHAT].append({
            "role": "system",
            "content": file_summary
        })
        
        return f"Successfully analyzed {ANALYZED_DATA['type']} file: {file.name}"
    
    except Exception as e:
        return f"Error analyzing file: {str(e)}"

# Function to convert data to n8n JSON format
def convert_to_n8n_json():
    global ANALYZED_DATA
    
    if ANALYZED_DATA is None:
        return "No file has been analyzed yet. Please upload a file first."
    
    try:
        if ANALYZED_DATA['type'] in ['csv', 'excel']:
            # Convert DataFrame to n8n compatible JSON
            data = ANALYZED_DATA['data']
            records = data.to_dict(orient='records')
            
            # Generate n8n workflow JSON template
            n8n_json = {
                "name": "Generated Data Workflow",
                "nodes": [
                    {
                        "parameters": {
                            "jsCode": f"return {json.dumps(records, indent=2)};"
                        },
                        "id": "1",
                        "name": "Code",
                        "type": "n8n-nodes-base.code",
                        "typeVersion": 1,
                        "position": [
                            250,
                            300
                        ]
                    }
                ],
                "connections": {},
                "active": False,
                "settings": {},
                "version": 1,
                "meta": {
                    "instanceId": "GENERATED"
                }
            }
            
            return json.dumps(n8n_json, indent=2)
        
        elif ANALYZED_DATA['type'] == 'json':
            # The data is already in JSON format, just need to wrap it in n8n structure
            data = ANALYZED_DATA['data']
            
            n8n_json = {
                "name": "JSON Workflow",
                "nodes": [
                    {
                        "parameters": {
                            "jsCode": f"return {json.dumps(data, indent=2)};"
                        },
                        "id": "1",
                        "name": "Code",
                        "type": "n8n-nodes-base.code",
                        "typeVersion": 1,
                        "position": [
                            250,
                            300
                        ]
                    }
                ],
                "connections": {},
                "active": False,
                "settings": {},
                "version": 1,
                "meta": {
                    "instanceId": "GENERATED"
                }
            }
            
            return json.dumps(n8n_json, indent=2)
        
        elif ANALYZED_DATA['type'] == 'text':
            # Convert text to a simple n8n workflow
            text_data = ANALYZED_DATA['data']
            
            n8n_json = {
                "name": "Text Processing Workflow",
                "nodes": [
                    {
                        "parameters": {
                            "jsCode": f"return {{ text: {json.dumps(text_data)} }};"
                        },
                        "id": "1",
                        "name": "Code",
                        "type": "n8n-nodes-base.code",
                        "typeVersion": 1,
                        "position": [
                            250,
                            300
                        ]
                    }
                ],
                "connections": {},
                "active": False,
                "settings": {},
                "version": 1,
                "meta": {
                    "instanceId": "GENERATED"
                }
            }
            
            return json.dumps(n8n_json, indent=2)
        
        else:
            return "Cannot convert this file type to n8n JSON format."
    
    except Exception as e:
        return f"Error generating n8n JSON: {str(e)}"

# Function to update system prompt
def update_system_prompt(new_prompt):
    global SYSTEM_PROMPT
    SYSTEM_PROMPT = new_prompt
    return f"System prompt updated!"

# Function to update generation parameters
def update_generation_params(temp, max_tokens, top_p, rep_penalty):
    global GENERATE_CONFIG
    GENERATE_CONFIG["temperature"] = temp
    GENERATE_CONFIG["max_new_tokens"] = max_tokens
    GENERATE_CONFIG["top_p"] = top_p
    GENERATE_CONFIG["repetition_penalty"] = rep_penalty
    return f"Generation parameters updated!"

# Function to display file data information
def display_file_info():
    global ANALYZED_DATA
    
    if ANALYZED_DATA is None:
        return "No file has been analyzed yet."
    
    file_info = f"## File Analysis Results\n\n"
    file_info += f"**File Type:** {ANALYZED_DATA['type']}\n\n"
    
    if ANALYZED_DATA['type'] in ['csv', 'excel']:
        summary = ANALYZED_DATA['summary']
        file_info += f"**Rows:** {summary['rows']}\n"
        file_info += f"**Columns:** {summary['columns']}\n"
        file_info += f"**Column Names:** {', '.join(summary['column_names'])}\n\n"
        
        # Sample data preview
        file_info += "**Sample Data (First 5 rows):**\n"
        sample_df = pd.DataFrame(summary['sample'])
        file_info += sample_df.to_markdown()
        
    elif ANALYZED_DATA['type'] == 'text':
        summary = ANALYZED_DATA['summary']
        file_info += f"**Length:** {summary['length']} characters\n"
        file_info += f"**Word Count:** {summary['word_count']}\n"
        file_info += f"**Line Count:** {summary['line_count']}\n\n"
        file_info += "**Preview:**\n```\n" + summary['preview'] + "\n```"
        
    elif ANALYZED_DATA['type'] == 'json':
        summary = ANALYZED_DATA['summary']
        file_info += f"**Type:** {summary['type']}\n"
        if summary['keys']:
            file_info += f"**Keys:** {', '.join(summary['keys'])}\n"
        if summary['length'] is not None:
            file_info += f"**Length:** {summary['length']} items\n"
        file_info += "\n**Preview:**\n```json\n" + summary['preview'] + "\n```"
    
    return file_info

# Function to select current chat
def select_chat(chat_name):
    global CURRENT_CHAT
    CURRENT_CHAT = chat_name
    return f"Switched to chat: {chat_name}"

# Function to clear current chat
def clear_current_chat():
    global CHATS, CURRENT_CHAT
    CHATS[CURRENT_CHAT] = []
    return f"Cleared chat: {CURRENT_CHAT}"

# Function to load model and return status
def load_model_button():
    global HF_TOKEN
    if not HF_TOKEN:
        return "Please enter your HuggingFace token first before loading the model."
    if MODEL_LOADED:
        return "Model is already loaded and ready!"
    elif MODEL_LOADING:
        return "Model is currently loading... Please wait."
    else:
        thread = Thread(target=load_model_in_background)
        thread.start()
        return "Started loading the model. This may take a few minutes..."

# Function to get available chats
def get_available_chats():
    global CHATS
    return list(CHATS.keys())

# Main Gradio app
def create_gradio_interface():
    # CSS for better styling
    css = """
    .gradio-container {max-width: 100% !important; padding: 0}
    .chat-message-user {background-color: #e0f7fa; padding: 12px; border-radius: 8px; margin-bottom: 8px}
    .chat-message-bot {background-color: #f1f8e9; padding: 12px; border-radius: 8px; margin-bottom: 8px}
    .file-info {border: 1px solid #ddd; padding: 15px; border-radius: 5px; margin-top: 10px}
    .json-output {font-family: monospace; white-space: pre; overflow-x: auto; background-color: #f5f5f5; padding: 15px; border-radius: 5px;}
    """
    
    # Setup tabs for different functionalities
    with gr.Blocks(css=css) as app:
        gr.Markdown("# 🤖 Advanced Mistral-7B-Instruct Chatbot for n8n JSON Generation")
        
        # Add fallback mode when model loading fails
        fallback_mode = gr.State(False)
        
        with gr.Tab("Chat"):
            with gr.Row():
                with gr.Column(scale=3):
                    # Initialize with empty list to fix the tuple format error
                    chatbot = gr.Chatbot(
                        [],
                        elem_id="chatbot",
                        height=500,
                        bubble_full_width=False,
                        avatar_images=(None, None),
                    )
                    
                    with gr.Row():
                        msg = gr.Textbox(
                            placeholder="Type your message here...",
                            container=False,
                            scale=8,
                            autofocus=True
                        )
                        send_btn = gr.Button("Send", scale=1, variant="primary")
                    
                    # Fallback response mode when model fails to load
                    def simple_fallback_response(message, history):
                        # Sample n8n JSON structure for common requests
                        if any(keyword in message.lower() for keyword in ['json', 'n8n', 'workflow']):
                            response = """Here's a basic n8n workflow JSON structure you can use:

```json
{
  "name": "Simple Workflow",
  "nodes": [
    {
      "parameters": {
        "values": {
          "string": [
            {
              "name": "data",
              "value": "Your data here"
            }
          ]
        }
      },
      "id": "1",
      "name": "Set",
      "type": "n8n-nodes-base.set",
      "typeVersion": 1,
      "position": [250, 300]
    }
  ],
  "connections": {},
  "active": false,
  "settings": {},
  "version": 1
}
```

You can customize this template with your specific data. If you need a more complex structure or specific node types, please let me know."""
                        elif 'file' in message.lower() or 'data' in message.lower():
                            response = "To analyze files or data, please upload your file in the 'File Analysis & JSON Conversion' tab. I'll be able to help you convert it to n8n compatible JSON format."
                        else:
                            response = "I'm currently operating in fallback mode because the Mistral-7B model couldn't be loaded. I can still help with basic n8n JSON structures. Try asking for a specific workflow type or JSON structure you need for n8n."
                        
                        return history + [(message, response)]
                    
                    with gr.Row():
                        chat_selector = gr.Dropdown(
                            choices=get_available_chats(),
                            value=CURRENT_CHAT,
                            label="Select Chat",
                            interactive=True
                        )
                        
                        with gr.Column(scale=1):
                            new_chat_name = gr.Textbox(
                                placeholder="New chat name",
                                label="Create New Chat",
                                container=True
                            )
                            with gr.Row():
                                create_chat_btn = gr.Button("Create", variant="secondary")
                                clear_chat_btn = gr.Button("Clear Current Chat", variant="secondary")
                
                with gr.Column(scale=1):
                    # HuggingFace Token Input
                    gr.Markdown("### HuggingFace Authentication")
                    hf_token_input = gr.Textbox(
                        label="HuggingFace Access Token",
                        placeholder="Enter your HF token (hf_xxx...)",
                        type="password",
                        info="Required to download the Mistral-7B model"
                    )
                    set_token_btn = gr.Button("Set Token", variant="secondary")
                    token_status = gr.Textbox(
                        label="Token Status", 
                        value="No token set", 
                        interactive=False,
                        lines=1
                    )
                    
                    # Model Loading and Settings
                    gr.Markdown("### Model Loading")
                    with gr.Row():
                        load_model_btn = gr.Button("Load Mistral-7B Model", variant="primary")
                        use_fallback_btn = gr.Button("Use Simple JSON Mode", variant="secondary")
                    
                    model_status = gr.Textbox(label="Model Status", value="Not loaded", interactive=False)
                    
                    # Function to toggle fallback mode
                    def toggle_fallback_mode(state):
                        global MODEL_LOADED
                        if MODEL_LOADED:
                            return state, "Model is already loaded. No need for fallback mode."
                        else:
                            return not state, "Using simple JSON generation mode. Limited functionality but no model loading required."
                    
                    # System Prompt
                    system_prompt_input = gr.Textbox(
                        label="System Prompt", 
                        value=SYSTEM_PROMPT,
                        lines=4,
                        placeholder="Enter system prompt to guide the AI's behavior..."
                    )
                    update_prompt_btn = gr.Button("Update System Prompt", variant="secondary")
                    
                    # Model Parameters
                    gr.Markdown("### Generation Parameters")
                    temperature = gr.Slider(
                        minimum=0.1, 
                        maximum=2.0, 
                        value=GENERATE_CONFIG["temperature"], 
                        step=0.1,
                        label="Temperature"
                    )
                    max_tokens = gr.Slider(
                        minimum=64, 
                        maximum=2048, 
                        value=GENERATE_CONFIG["max_new_tokens"], 
                        step=64,
                        label="Max Tokens"
                    )
                    top_p = gr.Slider(
                        minimum=0.1, 
                        maximum=1.0, 
                        value=GENERATE_CONFIG["top_p"], 
                        step=0.05,
                        label="Top P"
                    )
                    rep_penalty = gr.Slider(
                        minimum=1.0, 
                        maximum=2.0, 
                        value=GENERATE_CONFIG["repetition_penalty"], 
                        step=0.1,
                        label="Repetition Penalty"
                    )
                    update_params_btn = gr.Button("Update Parameters", variant="secondary")
        
        with gr.Tab("File Analysis & JSON Conversion"):
            with gr.Row():
                with gr.Column(scale=1):
                    file_upload = gr.File(label="Upload a file to analyze")
                    analyze_btn = gr.Button("Analyze File", variant="primary")
                    convert_json_btn = gr.Button("Convert to n8n JSON", variant="primary")
                
                with gr.Column(scale=2):
                    with gr.Tabs():
                        with gr.TabItem("File Analysis"):
                            file_analysis_output = gr.Markdown(label="File Analysis Results")
                        with gr.TabItem("n8n JSON Output"):
                            n8n_json_output = gr.Code(
                                language="json",
                                label="n8n Compatible JSON",
                                lines=20
                            )
        
        with gr.Tab("JSON Formatting Guide"):
            gr.Markdown("""
            # n8n JSON Formatting Guide
            
            This tab provides guidance on creating well-structured JSON for n8n workflows.
            
            ## Basic n8n Workflow Structure
            
            ```json
            {
              "name": "My Workflow",
              "nodes": [
                {
                  "parameters": { /* Node-specific parameters */ },
                  "id": "1",
                  "name": "Start Node",
                  "type": "n8n-nodes-base.some-node-type",
                  "typeVersion": 1,
                  "position": [250, 300]
                }
                // Additional nodes...
              ],
              "connections": {
                "Start Node": {
                  "main": [
                    [
                      {
                        "node": "Second Node",
                        "type": "main",
                        "index": 0
                      }
                    ]
                  ]
                }
                // Additional connections...
              }
            }
            ```
            
            ## Common n8n Node Types
            
            ### HTTP Request Node
            ```json
            {
              "parameters": {
                "url": "https://api.example.com/data",
                "method": "GET",
                "authentication": "none",
                "sendHeaders": true,
                "headerParameters": {
                  "parameters": [
                    {
                      "name": "Content-Type",
                      "value": "application/json"
                    }
                  ]
                }
              },
              "name": "HTTP Request",
              "type": "n8n-nodes-base.httpRequest",
              "typeVersion": 1,
              "position": [250, 300],
              "id": "1"
            }
            ```
            
            ### Function Node (JavaScript)
            ```json
            {
              "parameters": {
                "functionCode": "// Code here\nreturn items;"
              },
              "name": "Function",
              "type": "n8n-nodes-base.function",
              "typeVersion": 1,
              "position": [450, 300],
              "id": "2"
            }
            ```
            
            ### Set Node (Manual Data)
            ```json
            {
              "parameters": {
                "values": {
                  "string": [
                    {
                      "name": "fieldName",
                      "value": "value"
                    }
                  ],
                  "number": [
                    {
                      "name": "count",
                      "value": 42
                    }
                  ]
                }
              },
              "name": "Set",
              "type": "n8n-nodes-base.set",
              "typeVersion": 1,
              "position": [650, 300],
              "id": "3"
            }
            ```
            """)
            
            gr.Markdown("""
            ## Tips for Creating n8n-Compatible JSON
            
            1. Ensure all JSON keys and values are properly quoted
            2. Use proper nesting for workflow components
            3. Define unique IDs for each node
            4. Properly define connections between nodes
            5. Include all required parameters for each node type
            
            Use the chat interface to ask for specific n8n node configurations or workflow patterns.
            """)
        
        # Set up event handlers
        # Modify to handle fallback mode
        def handle_message(message, chat_history, is_fallback):
            if is_fallback:
                return simple_fallback_response(message, chat_history)
            else:
                return generate_response(message, chat_history)
                
        send_btn.click(
            handle_message, 
            inputs=[msg, chatbot, fallback_mode], 
            outputs=chatbot, 
            api_name="chat"
        )
        
        msg.submit(
            handle_message, 
            inputs=[msg, chatbot, fallback_mode], 
            outputs=chatbot,
            api_name=False
        )
        
        load_model_btn.click(
            load_model_button, 
            outputs=model_status, 
            api_name="load_model"
        )
        
        use_fallback_btn.click(
            toggle_fallback_mode,
            inputs=[fallback_mode],
            outputs=[fallback_mode, model_status],
            api_name="fallback_mode"
        )
        
        update_prompt_btn.click(
            update_system_prompt, 
            inputs=system_prompt_input, 
            outputs=model_status, 
            api_name="update_prompt"
        )
        
        update_params_btn.click(
            update_generation_params,
            inputs=[temperature, max_tokens, top_p, rep_penalty],
            outputs=model_status,
            api_name="update_params"
        )
        
        analyze_btn.click(
            analyze_uploaded_file,
            inputs=file_upload,
            outputs=model_status,
            api_name="analyze_file"
        ).then(
            display_file_info,
            outputs=file_analysis_output,
            api_name=False
        )
        
        convert_json_btn.click(
            convert_to_n8n_json,
            outputs=n8n_json_output,
            api_name="convert_to_n8n"
        )
        
        chat_selector.change(
            select_chat,
            inputs=chat_selector,
            outputs=model_status,
            api_name="select_chat"
        )
        
        create_chat_btn.click(
            create_new_chat,
            inputs=new_chat_name,
            outputs=model_status,
            api_name="create_chat"
        ).then(
            get_available_chats,
            outputs=chat_selector,
            api_name=False
        )
        
        clear_chat_btn.click(
            clear_current_chat,
            outputs=model_status,
            api_name="clear_chat"
        )
        
        set_token_btn.click(
            set_hf_token,
            inputs=hf_token_input,
            outputs=token_status,
            api_name="set_token"
        )
        
        # Initialize empty chatbot
        chatbot.value = []

    return app

# Launch the app
demo = create_gradio_interface()

if __name__ == "__main__":
    demo.launch()