sujoy0011 commited on
Commit
a66a74f
·
verified ·
1 Parent(s): 09b8662

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +13 -0
  2. main.py +296 -0
  3. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python:3.11-slim
3
+
4
+ WORKDIR /code
5
+
6
+ COPY ./requirements.txt /code/requirements.txt
7
+ RUN pip install --no-cache-dir --upgrade pip -r /code/requirements.txt
8
+
9
+ COPY . /code
10
+
11
+ EXPOSE 7860
12
+
13
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--log-level", "debug"]
main.py ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- Imports ---
2
+ import os
3
+ import io
4
+ from fastapi import FastAPI, HTTPException, File, UploadFile, Form, Response
5
+ from fastapi.middleware.cors import CORSMiddleware
6
+ from pydantic import BaseModel
7
+ # --- Dotenv Import ---
8
+ from dotenv import load_dotenv
9
+ # --- Other Imports ---
10
+ from openai import AsyncOpenAI, OpenAIError
11
+ from googleapiclient.discovery import build
12
+ from googleapiclient.errors import HttpError as GoogleHttpError
13
+ from langdetect import detect, LangDetectException
14
+ from deep_translator import GoogleTranslator, exceptions as TranslatorExceptions
15
+
16
+ # --- Load Environment Variables ---
17
+ load_dotenv()
18
+ print("Attempted to load environment variables from .env file.")
19
+
20
+ # --- Retrieve Environment Variables ---
21
+ # Get keys from environment (which load_dotenv populated if .env exists)
22
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
23
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
24
+ APP_NAME = os.getenv("APP_NAME", "Multilingual Scam Detector") # Example with default
25
+
26
+ # --- API Client Initialization & Checks ---
27
+ client = None
28
+ if OPENAI_API_KEY:
29
+ try:
30
+ client = AsyncOpenAI(api_key=OPENAI_API_KEY)
31
+ print("OpenAI client initialized.")
32
+ except OpenAIError as e:
33
+ print(f"Failed to initialize OpenAI client: {e}")
34
+ client = None # Ensure client is None if init fails
35
+ else:
36
+ print("\n*** WARNING: OPENAI_API_KEY not found in environment/.env. OpenAI features disabled. ***\n")
37
+
38
+ if not GOOGLE_API_KEY:
39
+ print("\n*** WARNING: GOOGLE_API_KEY not found in environment/.env. Google Safe Browsing checks disabled. ***\n")
40
+
41
+ # --- FastAPI App Initialization ---
42
+ app = FastAPI(
43
+ title=APP_NAME,
44
+ description="Analyzes text, URLs, and audio for potential scams using AI and external APIs.",
45
+ version="1.1.0" # Incremented version for change
46
+ )
47
+
48
+ # --- CORS Middleware ---
49
+ app.add_middleware(
50
+ CORSMiddleware,
51
+ allow_origins=["*"],
52
+ allow_credentials=True,
53
+ allow_methods=["*"],
54
+ allow_headers=["*"],
55
+ )
56
+
57
+ # --- Pydantic Models (Remain the same) ---
58
+ class AnalysisResponse(BaseModel):
59
+ status: str
60
+ reason: str
61
+ detected_language: str | None = None
62
+ is_translated: bool = False
63
+
64
+ class TTSRequest(BaseModel):
65
+ text: str
66
+ language: str | None = "en"
67
+
68
+ # --- Helper Functions (detect_and_translate, translate_reason_back - Remain the same) ---
69
+ async def detect_and_translate(text: str) -> tuple[str, str | None, bool]:
70
+ detected_lang = None
71
+ is_translated = False
72
+ try:
73
+ detected_lang = detect(text)
74
+ if detected_lang != 'en':
75
+ print(f"Detected language: {detected_lang}. Translating to English...")
76
+ translator = GoogleTranslator(source=detected_lang, target='en')
77
+ translated_text = translator.translate(text=text)
78
+ if translated_text:
79
+ print("Translation successful.")
80
+ return translated_text, detected_lang, True
81
+ else:
82
+ print("Translation returned empty result. Using original text.")
83
+ return text, detected_lang, False
84
+ else:
85
+ print("Detected language: English. No translation needed.")
86
+ return text, 'en', False
87
+ except LangDetectException:
88
+ print("Language detection failed. Assuming English.")
89
+ return text, None, False
90
+ except TranslatorExceptions.TranslationNotFound:
91
+ print(f"Translation engine could not find translation for language '{detected_lang}'. Using original text.")
92
+ return text, detected_lang, False
93
+ except Exception as e:
94
+ print(f"Translation error ({type(e).__name__}): {e}. Using original text.")
95
+ return text, detected_lang, False
96
+
97
+ async def translate_reason_back(reason: str, target_lang: str | None) -> str:
98
+ if target_lang and target_lang != 'en':
99
+ try:
100
+ print(f"Translating reason back to: {target_lang}")
101
+ translator = GoogleTranslator(source='en', target=target_lang)
102
+ translated_reason = translator.translate(text=reason)
103
+ return translated_reason or reason
104
+ except Exception as e:
105
+ print(f"Failed to translate reason back to {target_lang}: {e}")
106
+ return reason
107
+ return reason
108
+
109
+
110
+ # --- Core API Call Functions (Updated to use os.getenv results) ---
111
+
112
+ async def check_url_safety(url: str):
113
+ """Checks URL safety using Google Safe Browsing API V4."""
114
+ print(f"Checking URL safety for: {url}")
115
+ # Use GOOGLE_API_KEY retrieved earlier
116
+ if not GOOGLE_API_KEY:
117
+ print("Warning: GOOGLE_API_KEY not configured. Mock response for URL check.")
118
+ if "bad-link" in url or "malware.testing.google" in url: return {"status": "Scam", "reason": "URL flagged (Mock - API Key Missing)"}
119
+ return {"status": "Safe", "reason": "URL appears safe (Mock - API Key Missing)"}
120
+
121
+ try:
122
+ # Pass the key directly
123
+ service = build("safebrowsing", "v4", developerKey=GOOGLE_API_KEY, static_discovery=False)
124
+ threat_info = {
125
+ 'threatTypes': ["MALWARE", "SOCIAL_ENGINEERING", "UNWANTED_SOFTWARE", "POTENTIALLY_HARMFUL_APPLICATION", "THREAT_TYPE_UNSPECIFIED"],
126
+ 'platformTypes': ["ANY_PLATFORM"],
127
+ 'threatEntryTypes': ["URL"],
128
+ 'threatEntries': [{'url': url}]
129
+ }
130
+ body = {'client': {'clientId': APP_NAME.replace(" ", "-").lower(), 'clientVersion': "1.1.0"}, 'threatInfo': threat_info}
131
+ request = service.threatMatches().find(body=body)
132
+ response = request.execute()
133
+
134
+ matches = response.get('matches')
135
+ if matches:
136
+ threat_types = ", ".join(sorted(list(set([match['threatType'] for match in matches]))))
137
+ print(f"URL Found in Safe Browsing: {threat_types}")
138
+ return {"status": "Scam", "reason": f"URL flagged by Google Safe Browsing for: {threat_types}"}
139
+ else:
140
+ print("URL not found in Safe Browsing database.")
141
+ return {"status": "Safe", "reason": "URL not flagged by Google Safe Browsing."}
142
+ except GoogleHttpError as e:
143
+ print(f"Google Safe Browsing API HTTP Error: {e}")
144
+ status = e.resp.status
145
+ if status == 400: return {"status": "Error", "reason": f"Safe Browsing API request failed (Bad Request): {e}"}
146
+ if status == 403: return {"status": "Error", "reason": f"Safe Browsing API request failed (Permission Denied/Invalid Key?): {e}"}
147
+ return {"status": "Error", "reason": f"Could not verify URL via Safe Browsing (HTTP Error {status}): {e}"}
148
+ except Exception as e:
149
+ print(f"Generic Google Safe Browsing API Error: {e}")
150
+ return {"status": "Error", "reason": f"Could not verify URL via Safe Browsing (General Error): {e}"}
151
+
152
+
153
+ async def transcribe_audio(audio_file: UploadFile):
154
+ """Transcribes audio using OpenAI Whisper API."""
155
+ # Use the globally initialized client
156
+ if not client:
157
+ raise HTTPException(status_code=503, detail="Audio transcription unavailable: OpenAI client not initialized (check API key).")
158
+
159
+ print(f"Transcribing audio file: {audio_file.filename} (size: {audio_file.size})")
160
+ try:
161
+ audio_bytes = await audio_file.read()
162
+ if not audio_bytes: raise ValueError("Received empty audio file.")
163
+ audio_file_like = io.BytesIO(audio_bytes)
164
+ audio_file_like.name = audio_file.filename or "audio.mp3"
165
+
166
+ transcription = await client.audio.transcriptions.create(
167
+ model="whisper-1", file=audio_file_like
168
+ )
169
+ print(f"Transcription successful: {transcription.text[:100]}...")
170
+ return transcription.text
171
+ except OpenAIError as e:
172
+ print(f"OpenAI API Error during transcription: {e}")
173
+ raise HTTPException(status_code=502, detail=f"Audio transcription failed (API Error): {e.status_code} {e.body}")
174
+ except Exception as e:
175
+ print(f"Error during transcription: {e}")
176
+ raise HTTPException(status_code=500, detail=f"Audio transcription failed (Server Error): {e}")
177
+ finally:
178
+ await audio_file.close()
179
+
180
+
181
+ async def check_text_scam(text: str):
182
+ """Analyzes text for scams using OpenAI GPT and a specific prompt."""
183
+ # Use the globally initialized client
184
+ if not client:
185
+ print("Warning: OpenAI client not initialized. Using mock analysis for text check.")
186
+ if "won" in text.lower() and "click" in text.lower(): return {"status": "Scam", "reason": "Potential prize scam (Mock - API Key Missing)"}
187
+ if "urgent" in text.lower() and "verify" in text.lower(): return {"status": "Suspicious", "reason": "Urgency/Phishing tactic (Mock - API Key Missing)"}
188
+ return {"status": "Safe", "reason": "Text appears safe (Mock - API Key Missing)"}
189
+
190
+ print(f"Analyzing text with LLM: {text[:60]}...")
191
+ # System prompt remains the same
192
+ system_prompt = """
193
+ You are an AI assistant specialized in detecting scams, phishing attempts, and fraudulent content within text messages, including those transcribed from audio. Your goal is to classify the input text into one of three categories: "Safe", "Suspicious", or "Scam". Provide a concise reason for your classification, maximum 1-2 sentences.
194
+ Consider these factors: Urgency, Generic Greetings, Requests for Personal Information, Suspicious Links, Unsolicited Offers/Prizes, Grammatical Errors/Typos, Unusual Payment Methods, Threats or Blackmail, Impersonation, Investment Scams, Job Scams, Romance Scams.
195
+ Output Format: Provide the classification FIRST, followed by a colon, then a BRIEF reason.
196
+ Example 1: Scam: Contains a suspicious link and requests urgent login verification, typical of phishing.
197
+ Example 2: Suspicious: Unsolicited job offer with vague details asking for personal info upfront.
198
+ Example 3: Safe: Appears to be a standard appointment reminder or casual conversation.
199
+ """
200
+ try:
201
+ response = await client.chat.completions.create(
202
+ model="gpt-4-turbo", # Or gpt-3.5-turbo
203
+ messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": text} ],
204
+ max_tokens=100, temperature=0.2
205
+ )
206
+ analysis_result = response.choices[0].message.content.strip()
207
+ print(f"LLM Analysis Result: {analysis_result}")
208
+
209
+ # Parsing logic remains the same
210
+ status = "Suspicious"; reason = analysis_result
211
+ if ":" in analysis_result:
212
+ parts = analysis_result.split(":", 1)
213
+ potential_status = parts[0].strip().capitalize()
214
+ if potential_status in ["Safe", "Suspicious", "Scam"]: status = potential_status; reason = parts[1].strip()
215
+ else: # Fallback inference
216
+ analysis_lower = analysis_result.lower()
217
+ if any(w in analysis_lower for w in ["scam", "phishing", "fraud", "malicious"]): status = "Scam"
218
+ elif any(w in analysis_lower for w in ["suspicious", "warning", "risk", "caution", "unsolicited"]): status = "Suspicious"
219
+ elif any(w in analysis_lower for w in ["safe", "legitimate", "benign"]): status = "Safe"
220
+ if not reason: reason = "Analysis complete."
221
+ return {"status": status, "reason": reason}
222
+ except OpenAIError as e:
223
+ print(f"OpenAI API Error during text analysis: {e}")
224
+ return {"status": "Error", "reason": f"LLM analysis failed (API Error): {e.status_code} {e.body}"}
225
+ except Exception as e:
226
+ print(f"Generic error during text analysis: {e}")
227
+ return {"status": "Error", "reason": f"LLM analysis failed (Server Error): {e}"}
228
+
229
+ # --- API Endpoints (/analyze, /synthesize - Remain the same structurally) ---
230
+ @app.post("/analyze", response_model=AnalysisResponse, tags=["Analysis"])
231
+ async def analyze_input(
232
+ input_type: str = Form(..., description="Type of input: 'text', 'url', or 'audio'"),
233
+ text: str | None = Form(None, description="Text message or URL (required if input_type is 'text' or 'url')"),
234
+ file: UploadFile | None = File(None, description="Audio file (required if input_type is 'audio')")
235
+ ):
236
+ analysis_result = {}
237
+ detected_language = None
238
+ is_translated = False
239
+ reason_target_language = 'en'
240
+
241
+ try:
242
+ if input_type == 'url':
243
+ if not text: raise HTTPException(status_code=400, detail="URL required for type 'url'")
244
+ analysis_result = await check_url_safety(text)
245
+ elif input_type == 'text':
246
+ if not text: raise HTTPException(status_code=400, detail="Text required for type 'text'")
247
+ text_to_analyze, detected_language, is_translated = await detect_and_translate(text)
248
+ reason_target_language = detected_language or 'en'
249
+ analysis_result = await check_text_scam(text_to_analyze)
250
+ elif input_type == 'audio':
251
+ if not file: raise HTTPException(status_code=400, detail="File required for type 'audio'")
252
+ transcribed_text = await transcribe_audio(file)
253
+ if not transcribed_text: raise HTTPException(status_code=500, detail="Transcription empty.")
254
+ text_to_analyze, detected_language, is_translated = await detect_and_translate(transcribed_text)
255
+ reason_target_language = detected_language or 'en'
256
+ analysis_result = await check_text_scam(text_to_analyze)
257
+ else:
258
+ raise HTTPException(status_code=400, detail=f"Invalid input_type '{input_type}'.")
259
+
260
+ if analysis_result.get("status") == "Error":
261
+ return AnalysisResponse(status="Error", reason=analysis_result.get('reason', 'Analysis failed.'), detected_language=detected_language, is_translated=is_translated)
262
+
263
+ final_reason = await translate_reason_back(analysis_result.get('reason', 'Analysis reason missing.'), reason_target_language)
264
+ return AnalysisResponse(status=analysis_result.get('status', 'Error'), reason=final_reason, detected_language=detected_language, is_translated=is_translated)
265
+ except HTTPException as he:
266
+ raise he
267
+ except Exception as e:
268
+ print(f"Unexpected error in /analyze endpoint: {e}")
269
+ return AnalysisResponse(status="Error", reason=f"Unexpected server error: {type(e).__name__}", detected_language=None, is_translated=False)
270
+
271
+
272
+ @app.post("/synthesize", tags=["TTS"])
273
+ async def synthesize_speech(request: TTSRequest):
274
+ """Generates speech from text using OpenAI TTS-1 model."""
275
+ if not client:
276
+ raise HTTPException(status_code=503, detail="TTS unavailable: OpenAI client not initialized.")
277
+ print(f"Synthesizing speech for text: {request.text[:50]}...")
278
+ try:
279
+ voice_model = "alloy"
280
+ response = await client.audio.speech.create(model="tts-1", voice=voice_model, input=request.text, response_format="mp3")
281
+ return Response(content=response.content, media_type="audio/mpeg")
282
+ except OpenAIError as e:
283
+ print(f"OpenAI API Error during TTS: {e}")
284
+ raise HTTPException(status_code=502, detail=f"TTS generation failed (API Error): {e.status_code} {e.body}")
285
+ except Exception as e:
286
+ print(f"TTS Error: {e}")
287
+ raise HTTPException(status_code=500, detail=f"Text-to-Speech generation failed (Server Error): {e}")
288
+
289
+
290
+ @app.get("/", tags=["Health"])
291
+ async def read_root():
292
+ # Check status of API keys loaded
293
+ openai_status = "OK" if OPENAI_API_KEY and client else "Not Configured"
294
+ google_status = "OK" if GOOGLE_API_KEY else "Not Configured"
295
+ return {"message": f"{APP_NAME} API is running!", "openai_key_status": openai_status, "google_key_status": google_status}
296
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-dotenv
4
+ httpx
5
+ openai
6
+ google-api-python-client
7
+ langdetect
8
+ deep-translator
9
+ pydantic
10
+ python-multipart