vkrishnan569 commited on
Commit
0d8eb3a
·
verified ·
1 Parent(s): 0b6c0ae

Delete main.py

Browse files
Files changed (1) hide show
  1. main.py +0 -91
main.py DELETED
@@ -1,91 +0,0 @@
1
- from flask import Flask, request, jsonify
2
- from llama_cpp import Llama
3
- from huggingface_hub import hf_hub_download
4
- from model import model_download
5
- model_download()
6
-
7
- # Initialize the Llama model with chat format set to "llama-2"
8
- llm = Llama(model_path="./llama-2-7b-chat.Q2_K.gguf", chat_format="llama-2")
9
-
10
- # Define the system prompt
11
- system_prompt = (
12
- "I am an Indian law chatbot designed to provide legal support to marginalized communities. "
13
- "This model was fine-tuned by Sathish and his team members at the University College of Engineering Dindigul. "
14
- "The model has been trained on various legal topics. "
15
- "Feel free to ask questions."
16
- )
17
-
18
- # Initialize the conversation history list with the system prompt
19
- conversation_history = [{"role": "system", "content": system_prompt}]
20
-
21
- # Create a Flask application
22
- app = Flask(__name__)
23
-
24
- # Define the model function
25
- def model(query):
26
- global conversation_history # Declare global to update history
27
-
28
- # Add the user's query to the conversation history
29
- conversation_history.append({"role": "user", "content": query})
30
-
31
- # Calculate the total number of tokens in the conversation history
32
- # (You may need to modify this part to calculate the token count accurately based on your tokenizer)
33
- total_tokens = sum(len(message["content"].split()) for message in conversation_history)
34
-
35
- # If the total number of tokens exceeds the model's context window, trim the history
36
- # You may need to adjust the 512 value based on your model's actual context window size
37
- context_window_size = 512
38
- while total_tokens > context_window_size:
39
- # Remove the oldest messages from the conversation history
40
- conversation_history.pop(0)
41
- # Recalculate the total number of tokens
42
- total_tokens = sum(len(message["content"].split()) for message in conversation_history)
43
-
44
- # Generate chat completion with the conversation history
45
- response = llm.create_chat_completion(messages=conversation_history, max_tokens=75)
46
-
47
- # Extract the assistant's response from the completion dictionary
48
- if response and 'choices' in response and response['choices']:
49
- assistant_response = response['choices'][0]['message']['content']
50
- assistant_response = assistant_response.strip()
51
-
52
- # Add the assistant's response to the conversation history
53
- conversation_history.append({"role": "assistant", "content": assistant_response})
54
-
55
- # Print the assistant's response
56
- print("Assistant response:", assistant_response)
57
-
58
- # Return the assistant's response
59
- return assistant_response
60
- else:
61
- print("Error: Invalid response structure.")
62
- return None
63
-
64
-
65
- # Define the endpoint for the API
66
- @app.route("/chat", methods=["GET"])
67
- def chat_endpoint():
68
- # Get the query parameter from the request
69
- query = request.args.get("query")
70
-
71
- # Check if the "refresh" parameter is set to "true"
72
- refresh = request.args.get("refresh")
73
- if refresh and refresh.lower() == "true":
74
- # Clear the conversation history
75
- global conversation_history
76
- conversation_history = [{"role": "system", "content": system_prompt}]
77
- return jsonify({"response": "Conversation history cleared."})
78
-
79
- # If there is no query, return an error message
80
- if not query:
81
- return jsonify({"error": "Query parameter is required."}), 400
82
-
83
- # Call the model function with the query
84
- response = model(query)
85
-
86
- # Return the assistant's response as JSON
87
- return jsonify({"response": response})
88
-
89
- # Run the Flask app
90
- if __name__ == "__main__":
91
- app.run(host="0.0.0.0", port=5000)