|
|
import gradio as gr |
|
|
from faster_whisper import WhisperModel |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import torch |
|
|
import requests |
|
|
import base64 |
|
|
import tempfile |
|
|
import os |
|
|
import logging |
|
|
import time |
|
|
from datetime import datetime |
|
|
from html.parser import HTMLParser |
|
|
from fastapi import FastAPI, Request, Query |
|
|
from fastapi.responses import JSONResponse |
|
|
import uvicorn |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
logger.info("Loading models...") |
|
|
whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8") |
|
|
model_name = "HuggingFaceTB/SmolLM2-360M-Instruct" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model_name, |
|
|
torch_dtype=torch.float32, |
|
|
device_map="cpu", |
|
|
low_cpu_mem_usage=True |
|
|
) |
|
|
logger.info("Models loaded!") |
|
|
|
|
|
def search_parallel(query): |
|
|
"""DuckDuckGo search""" |
|
|
logger.info("[SEARCH] Starting...") |
|
|
try: |
|
|
response = requests.get( |
|
|
'https://html.duckduckgo.com/html/', |
|
|
params={'q': query}, |
|
|
headers={'User-Agent': 'Mozilla/5.0'}, |
|
|
timeout=1.5 |
|
|
) |
|
|
if response.status_code == 200: |
|
|
class DDGParser(HTMLParser): |
|
|
def __init__(self): |
|
|
super().__init__() |
|
|
self.results = [] |
|
|
self.in_result = False |
|
|
self.current_text = "" |
|
|
|
|
|
def handle_starttag(self, tag, attrs): |
|
|
if tag == 'a' and any(k == 'class' and 'result__a' in v for k, v in attrs): |
|
|
self.in_result = True |
|
|
|
|
|
def handle_data(self, data): |
|
|
if self.in_result and data.strip(): |
|
|
self.current_text += data.strip() + " " |
|
|
|
|
|
def handle_endtag(self, tag): |
|
|
if tag == 'a' and self.in_result: |
|
|
if self.current_text: |
|
|
self.results.append(self.current_text.strip()[:120]) |
|
|
self.current_text = "" |
|
|
self.in_result = False |
|
|
|
|
|
parser = DDGParser() |
|
|
parser.feed(response.text) |
|
|
result = "\n".join([f"• {r}" for r in parser.results[:2]]) if parser.results else "No results" |
|
|
logger.info("[SEARCH] ✓") |
|
|
return result, "DuckDuckGo" |
|
|
except: |
|
|
pass |
|
|
return "No search results", "None" |
|
|
|
|
|
def generate_answer(text_input): |
|
|
"""Generate answer""" |
|
|
logger.info(f"[AI] Question: {text_input[:60]}...") |
|
|
|
|
|
try: |
|
|
if not text_input or not text_input.strip(): |
|
|
return "No input provided" |
|
|
|
|
|
current_date = datetime.now().strftime("%B %d, %Y") |
|
|
|
|
|
search_start = time.time() |
|
|
search_results, search_engine = search_parallel(text_input) |
|
|
logger.info(f"[AI] Search: {time.time()-search_start:.2f}s") |
|
|
|
|
|
messages = [ |
|
|
{"role": "system", "content": f"Today is {current_date}. Answer briefly using search results (60-80 words)."}, |
|
|
{"role": "user", "content": f"Search:\n{search_results}\n\nQ: {text_input}\nA:"} |
|
|
] |
|
|
|
|
|
prompt = f"<|im_start|>system\n{messages[0]['content']}<|im_end|>\n<|im_start|>user\n{messages[1]['content']}<|im_end|>\n<|im_start|>assistant\n" |
|
|
|
|
|
gen_start = time.time() |
|
|
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=800) |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=80, |
|
|
temperature=0.7, |
|
|
do_sample=True, |
|
|
top_p=0.9, |
|
|
top_k=40, |
|
|
repetition_penalty=1.15, |
|
|
pad_token_id=tokenizer.eos_token_id |
|
|
) |
|
|
|
|
|
answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip() |
|
|
logger.info(f"[AI] Gen: {time.time()-gen_start:.2f}s | ✓") |
|
|
|
|
|
return f"{answer}\n\n**Source:** {search_engine}" |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"[AI] Error: {str(e)}") |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
@app.post("/api/ai") |
|
|
async def api_ai_post(request: Request): |
|
|
"""AI endpoint - POST with JSON body""" |
|
|
try: |
|
|
body = await request.body() |
|
|
logger.info(f"[API AI POST] Raw body: {body}") |
|
|
|
|
|
if not body: |
|
|
return JSONResponse({"error": "Empty request body"}, status_code=400) |
|
|
|
|
|
try: |
|
|
data = await request.json() |
|
|
except Exception as e: |
|
|
logger.error(f"[API AI POST] JSON parse error: {str(e)}") |
|
|
return JSONResponse({"error": f"Invalid JSON: {str(e)}"}, status_code=400) |
|
|
|
|
|
logger.info(f"[API AI POST] Parsed data: {data}") |
|
|
|
|
|
question = data.get("text", "") |
|
|
if not question: |
|
|
return JSONResponse({"error": "No 'text' field in JSON"}, status_code=400) |
|
|
|
|
|
answer = generate_answer(question) |
|
|
return JSONResponse({"answer": answer}) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"[API AI POST] Error: {str(e)}") |
|
|
return JSONResponse({"error": str(e)}, status_code=500) |
|
|
|
|
|
@app.get("/api/ai") |
|
|
async def api_ai_get(text: str = Query(..., description="Question text")): |
|
|
"""AI endpoint - GET with query param (Pluely fallback)""" |
|
|
try: |
|
|
logger.info(f"[API AI GET] Question: {text}") |
|
|
|
|
|
if not text: |
|
|
return JSONResponse({"error": "No text parameter"}, status_code=400) |
|
|
|
|
|
answer = generate_answer(text) |
|
|
return JSONResponse({"answer": answer}) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"[API AI GET] Error: {str(e)}") |
|
|
return JSONResponse({"error": str(e)}, status_code=500) |
|
|
|
|
|
@app.get("/health") |
|
|
async def health(): |
|
|
return {"status": "ok", "model": "SmolLM2-360M"} |
|
|
|
|
|
|
|
|
with gr.Blocks(title="Fast Q&A") as demo: |
|
|
gr.Markdown(""" |
|
|
# ⚡ Ultra-Fast Q&A - SmolLM2-360M |
|
|
|
|
|
## 🎯 Pluely Configuration |
|
|
|
|
|
### Option 1: GET with Query Param (EASIEST - Windows Compatible) |
|
|
``` |
|
|
curl https://archcoder-basic-app.hf.space/api/ai?text={{TEXT}} |
|
|
``` |
|
|
**Response Path:** `answer` |
|
|
|
|
|
### Option 2: POST with JSON (If Option 1 doesn't work) |
|
|
``` |
|
|
curl -X POST https://archcoder-basic-app.hf.space/api/ai -H "Content-Type: application/json" --data-binary @- << EOF |
|
|
{"text":"{{TEXT}}"} |
|
|
EOF |
|
|
``` |
|
|
**Response Path:** `answer` |
|
|
""") |
|
|
|
|
|
with gr.Tab("Test"): |
|
|
test_input = gr.Textbox(label="Question") |
|
|
test_btn = gr.Button("🚀 Test") |
|
|
test_output = gr.Textbox(label="Answer", lines=8) |
|
|
test_btn.click(fn=generate_answer, inputs=[test_input], outputs=[test_output]) |
|
|
|
|
|
app = gr.mount_gradio_app(app, demo, path="/") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|
|