Spaces:
Running
Running
| """ | |
| You call this ENDPOINT and it returns you a JSON which is of this format: | |
| POST FORMAT: (/api/groq or api/google or /api/ollama ...) | |
| { | |
| "query": "????", | |
| "llm": "llama70b-whatever", | |
| "knn": "3", | |
| "stream": False | |
| } | |
| RESPONSE FORMAT: | |
| { | |
| "response": "blabla", | |
| "references": "1, 2, 3" | |
| } | |
| """ | |
| # TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING) | |
| from flask import Flask | |
| from flask import request | |
| from utils import embedding_output, db_output, groq_llm_output, ollama_llm_output, google_llm_output | |
| app = Flask(__name__) | |
| def groq_completion(): | |
| message = request.get_json() | |
| query: str = message['query'] | |
| llm: str = message['llm'] | |
| knn: int = int(message['knn']) | |
| stream: bool = bool(message['stream']) | |
| embedding_data = embedding_output(query) | |
| db_knn = db_output(embedding_data, knn) | |
| output, references = groq_llm_output(query, db_knn, llm, stream) | |
| return { | |
| "response": output, | |
| "references": references | |
| } | |
| def ollama_completion(): | |
| message = request.get_json() | |
| query: str = message['query'] | |
| llm: str = message['llm'] | |
| knn: int = int(message['knn']) | |
| stream: bool = bool(message['stream']) | |
| embedding_data = embedding_output(query) | |
| db_knn = db_output(embedding_data, knn) | |
| response_json, references = ollama_llm_output(query, db_knn, llm, stream) | |
| if response_json.get("error"): | |
| print(response_json) | |
| return { | |
| "response": "An error occured, try again.", | |
| "references": "No references" | |
| } | |
| return { | |
| "response": response_json['response'], | |
| "references": references | |
| } | |
| def google_completion(): | |
| message = request.get_json() | |
| query: str = message['query'] | |
| llm: str = message['llm'] | |
| knn: int = int(message['knn']) | |
| stream: bool = bool(message['stream']) | |
| embedding_data = embedding_output(query) | |
| db_knn = db_output(embedding_data, knn) | |
| response_json, references = google_llm_output(query, db_knn, llm, stream) | |
| return { | |
| "response": response_json, | |
| "references": references | |
| } | |
| """ | |
| curl -X POST http://localhost:8000/api/groq/generate -H "Content-Type: application/json" -d '{ | |
| "query": "How do I create a sphere in FURY?", | |
| "llm": "llama3-70b-8192", | |
| "knn": "3", | |
| "stream": false | |
| }' | |
| curl -X POST http://localhost:8000/api/ollama/generate -H "Content-Type: application/json" -d '{ | |
| "query": "How do I create a sphere in FURY?", | |
| "llm": "phi3", | |
| "knn": "3", | |
| "stream": false | |
| }' | |
| """ | |