An-Egoistic-Developer-Full-Of-Knowledge commited on
Commit
22c6386
Β·
verified Β·
1 Parent(s): effa233

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -69
app.py CHANGED
@@ -1,78 +1,33 @@
1
- import os
2
  from fastapi import FastAPI, Request
3
- from fastapi.middleware.cors import CORSMiddleware
4
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
 
6
- import os
 
7
 
8
- # ======================
9
- # βœ… Set writable cache paths
10
- # ======================
11
- custom_cache = "/app/hf_cache"
12
- os.environ["HF_HOME"] = custom_cache
13
- os.environ["TRANSFORMERS_CACHE"] = os.path.join(custom_cache, "transformers")
14
- os.environ["HF_DATASETS_CACHE"] = os.path.join(custom_cache, "datasets")
15
- os.environ["HF_HUB_CACHE"] = os.path.join(custom_cache, "hub")
16
-
17
- os.makedirs(custom_cache, exist_ok=True)
18
- os.makedirs(os.environ["TRANSFORMERS_CACHE"], exist_ok=True)
19
- os.makedirs(os.environ["HF_DATASETS_CACHE"], exist_ok=True)
20
- os.makedirs(os.environ["HF_HUB_CACHE"], exist_ok=True)
21
-
22
- print("βœ… Hugging Face cache directories set to:", custom_cache)
23
-
24
- # ============================
25
- # πŸš€ Model Setup
26
- # ============================
27
- MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
28
-
29
- print(f"πŸ”₯ Loading {MODEL_NAME} ...")
30
-
31
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
32
- model = AutoModelForCausalLM.from_pretrained(
33
- MODEL_NAME,
34
- torch_dtype="auto",
35
- device_map="auto"
36
- )
37
-
38
- generator = pipeline(
39
- "text-generation",
40
- model=model,
41
- tokenizer=tokenizer,
42
- max_new_tokens=512,
43
- temperature=0.7,
44
- top_p=0.95
45
- )
46
-
47
- # ============================
48
- # 🌐 FastAPI Setup
49
- # ============================
50
- app = FastAPI(title="Jarvis AI V2", version="1.0")
51
-
52
- app.add_middleware(
53
- CORSMiddleware,
54
- allow_origins=["*"],
55
- allow_credentials=True,
56
- allow_methods=["*"],
57
- allow_headers=["*"],
58
- )
59
 
60
  @app.get("/")
61
- def root():
62
- return {"message": "πŸ€– Jarvis AI V2 is running successfully!"}
63
 
64
- @app.post("/generate")
65
- async def generate(request: Request):
66
  data = await request.json()
67
  prompt = data.get("prompt", "")
68
- if not prompt:
69
- return {"error": "Missing prompt text."}
70
-
71
- print(f"🧠 Generating for prompt: {prompt[:50]}...")
72
-
73
- output = generator(prompt)[0]["generated_text"]
74
- return {"response": output.strip()}
75
 
76
- if __name__ == "__main__":
77
- import uvicorn
78
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, Request
2
+ from fastapi.responses import JSONResponse
3
+ from huggingface_hub import InferenceClient
4
 
5
+ # Initialize FastAPI app
6
+ app = FastAPI()
7
 
8
+ # Use the instruction-tuned Gemma model (adjust name if 3B or 9B version is available)
9
+ MODEL_NAME = "google/gemma-2-9b-it"
10
+ client = InferenceClient(model=MODEL_NAME)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  @app.get("/")
13
+ async def home():
14
+ return {"status": "online", "model": MODEL_NAME}
15
 
16
+ @app.post("/chat")
17
+ async def chat(request: Request):
18
  data = await request.json()
19
  prompt = data.get("prompt", "")
 
 
 
 
 
 
 
20
 
21
+ if not prompt.strip():
22
+ return JSONResponse({"error": "Prompt is empty"}, status_code=400)
23
+
24
+ try:
25
+ response = client.text_generation(
26
+ prompt,
27
+ max_new_tokens=300,
28
+ temperature=0.7,
29
+ top_p=0.9
30
+ )
31
+ return JSONResponse({"response": response})
32
+ except Exception as e:
33
+ return JSONResponse({"error": str(e)}, status_code=500)