Adi362 commited on
Commit
e997190
·
verified ·
1 Parent(s): 3ebf53f

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +138 -0
main.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uvicorn
3
+ from fastapi import FastAPI, Request, HTTPException
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from pydantic import BaseModel
6
+ import httpx
7
+ from duckduckgo_search import DDGS
8
+
9
+ app = FastAPI(title="Edyx Situation Aware AI Pipeline")
10
+
11
+ # Allow requests from the Edyx gateway/frontend
12
+ app.add_middleware(
13
+ CORSMiddleware,
14
+ allow_origins=["*"],
15
+ allow_credentials=True,
16
+ allow_methods=["*"],
17
+ allow_headers=["*"],
18
+ )
19
+
20
+ class ChatRequest(BaseModel):
21
+ message: str
22
+ messages: list = []
23
+
24
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
25
+
26
+ # Fallback check - if we were actually deploying on HF with a local GGUF,
27
+ # we would load llama-cpp-python here. For this stage, we'll setup the Groq primary pipeline.
28
+
29
+ async def evaluate_needs_search(query: str) -> bool:
30
+ """Uses a fast, small model to determine if the query requires real-time data."""
31
+ if not GROQ_API_KEY:
32
+ return False
33
+
34
+ system_prompt = """You are a highly efficient classification router.
35
+ Determine if the user's query requires up-to-date, real-time information or current events data from the internet to answer accurately.
36
+ Respond ONLY with "YES" if it requires search, or "NO" if it can be answered with general knowledge up to 2023.
37
+ DO NOT provide any other text."""
38
+
39
+ try:
40
+ async with httpx.AsyncClient() as client:
41
+ response = await client.post(
42
+ "https://api.groq.com/openai/v1/chat/completions",
43
+ headers={"Authorization": f"Bearer {GROQ_API_KEY}"},
44
+ json={
45
+ "model": "llama3-8b-8192", # Fast and cheap for routing
46
+ "messages": [
47
+ {"role": "system", "content": system_prompt},
48
+ {"role": "user", "content": query}
49
+ ],
50
+ "temperature": 0.1,
51
+ "max_tokens": 10
52
+ },
53
+ timeout=10.0
54
+ )
55
+ response.raise_for_status()
56
+ result = response.json()
57
+ answer = result['choices'][0]['message']['content'].strip().upper()
58
+ return "YES" in answer
59
+ except Exception as e:
60
+ print(f"Routing evaluation error: {e}")
61
+ return False # Default to no search on error to save latency
62
+
63
+ def perform_search(query: str, max_results: int = 3) -> str:
64
+ """Performs a web search using DuckDuckGo."""
65
+ try:
66
+ with DDGS() as ddgs:
67
+ results = list(ddgs.text(query, max_results=max_results))
68
+
69
+ if not results:
70
+ return "No recent information found."
71
+
72
+ context = "Here is some current information I found on the web regarding the user's query:\n\n"
73
+ for i, r in enumerate(results):
74
+ context += f"Source {i+1} [{r.get('title', 'No Title')}]: {r.get('body', '')}\n"
75
+ return context
76
+ except Exception as e:
77
+ print(f"Search error: {e}")
78
+ return "Search failed or was blocked."
79
+
80
+ @app.post("/chat/completions")
81
+ async def situation_aware_chat(request: ChatRequest):
82
+ if not GROQ_API_KEY:
83
+ raise HTTPException(status_code=500, detail="GROQ_API_KEY is not set in the environment.")
84
+
85
+ # 1. Evaluate if search is needed
86
+ user_query = request.message
87
+ needs_search = await evaluate_needs_search(user_query)
88
+
89
+ context_injection = ""
90
+ if needs_search:
91
+ print(f"Query '{user_query}' requires search. Fetching data...")
92
+ context_injection = perform_search(user_query)
93
+ print("Search complete.")
94
+
95
+ # 2. Prepare the final prompt
96
+ system_base = "You are 'Situation Aware AI', an advanced assistant integrated into the Edyx platform."
97
+ if context_injection:
98
+ system_base += "\n\nThe user has asked a question that requires current knowledge. You have been provided with real-time web search results below. Incorporate this information seamlessly into your answer to provide the most up-to-date and accurate response. Do not mention that you 'searched the web' unless asked, just present the facts.\n\n" + context_injection
99
+
100
+ # Construct message array preserving history
101
+ final_messages = [{"role": "system", "content": system_base}]
102
+
103
+ # Add previous history (excluding the current message if it's already in the list)
104
+ for msg in request.messages:
105
+ final_messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})
106
+
107
+ # Ensure current query is at the end if not provided in history block
108
+ if not request.messages or request.messages[-1].get("content") != user_query:
109
+ final_messages.append({"role": "user", "content": user_query})
110
+
111
+ # 3. Call Primary LLM
112
+ try:
113
+ async with httpx.AsyncClient() as client:
114
+ response = await client.post(
115
+ "https://api.groq.com/openai/v1/chat/completions",
116
+ headers={"Authorization": f"Bearer {GROQ_API_KEY}"},
117
+ json={
118
+ "model": "llama3-70b-8192",
119
+ "messages": final_messages,
120
+ "temperature": 0.5,
121
+ "max_tokens": 4096
122
+ },
123
+ timeout=30.0
124
+ )
125
+ response.raise_for_status()
126
+ result = response.json()
127
+ return result
128
+ except Exception as e:
129
+ print(f"Primary LLM Error: {e}")
130
+ # Here we would fallback to `llama-cpp-python` local inference
131
+ raise HTTPException(status_code=503, detail="Primary AI service is currently unavailable.")
132
+
133
+ @app.get("/health")
134
+ def health_check():
135
+ return {"status": "ok", "service": "edyx-situation-aware-pipeline"}
136
+
137
+ if __name__ == "__main__":
138
+ uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)