ranggafermata commited on
Commit
bb193a1
Β·
verified Β·
1 Parent(s): 6bd7f6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -41
app.py CHANGED
@@ -1,48 +1,107 @@
 
 
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- import torch
4
  from huggingface_hub import login
5
  import os
6
 
7
  login(token=os.getenv("HF_TOKEN"))
8
 
9
- # Load Mistral-7B-Instruct base model
10
- tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
11
- model = AutoModelForCausalLM.from_pretrained(
12
- "mistralai/Mistral-7B-Instruct-v0.2",
13
- device_map="auto",
14
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
15
- )
16
-
17
- # Prompt wrapper with Fermata-Light persona
18
- def wrap_prompt(user_input):
19
- return f"""[INST] You are **Fermata-Light** one of the Fermata model but you are the light version, a fast, casual, and friendly AI assistant. You speak with clarity and confidence. Be witty, concise, and helpful. And remember Rangga Fermata is a guy behind this model.
20
- {user_input} [/INST]"""
21
-
22
- # Chat function
23
- def chat(user_input):
24
- prompt = wrap_prompt(user_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
26
- outputs = model.generate(
27
- **inputs,
28
- max_new_tokens=256,
29
- temperature=0.7,
30
- top_p=0.9,
31
- do_sample=True,
32
- repetition_penalty=1.1
33
- )
34
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
35
- return response.split("[/INST]")[-1].strip()
36
-
37
- # Gradio UI
38
- gui = gr.Interface(
39
- fn=chat,
40
- inputs=gr.Textbox(lines=5, label="Ask Fermata-Light"),
41
- outputs=gr.Textbox(label="Fermata-Light Says:"),
42
- title="🧠 Fermata-Light (Mistral-7B Prompt-Powered)",
43
- description="A fast, witty assistant powered by Mistral-7B and prompt-engineered with personality."
44
- )
45
-
46
- # Launch
47
- if __name__ == "__main__":
48
- gui.launch()
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.responses import JSONResponse
3
  import gradio as gr
4
+
5
+
6
  from huggingface_hub import login
7
  import os
8
 
9
  login(token=os.getenv("HF_TOKEN"))
10
 
11
+ from transformers import AutoTokenizer, AutoModelForCausalLM
12
+ import torch
13
+
14
+ model_id = "ranggafermata/Fermata"
15
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
16
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32)
17
+
18
+ app = FastAPI()
19
+
20
+ def chat_function(message):
21
+ inputs = tokenizer(message, return_tensors="pt").to(model.device)
22
+ outputs = model.generate(**inputs, max_new_tokens=128)
23
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
24
+
25
+ @app.post("/chat")
26
+ async def chat_api(request: Request):
27
+ try:
28
+ body = await request.json()
29
+ prompt = body.get("input", "")
30
+ if not prompt:
31
+ return JSONResponse(content={"error": "Missing input"}, status_code=400)
32
+ output = chat_function(prompt)
33
+ return JSONResponse(content={"output": output})
34
+ except Exception as e:
35
+ return JSONResponse(content={"error": str(e)}, status_code=500)
36
+
37
+ # Weather API
38
+ def get_weather(location):
39
+ key = os.getenv("OPENWEATHER_API_KEY")
40
+ if not key:
41
+ return "Missing API key for weather."
42
+ try:
43
+ url = f"http://api.openweathermap.org/data/2.5/weather?q={location}&appid={key}&units=metric"
44
+ r = requests.get(url).json()
45
+ return f"{r['name']}: {r['main']['temp']}Β°C, {r['weather'][0]['description']}"
46
+ except:
47
+ return "Failed to fetch weather."
48
+
49
+ # NASA API
50
+ def get_apod():
51
+ key = os.getenv("NASA_API_KEY")
52
+ if not key:
53
+ return "Missing API key for NASA."
54
+ try:
55
+ r = requests.get(f"https://api.nasa.gov/planetary/apod?api_key={key}").json()
56
+ return f"πŸ“· {r['title']}\n\n{r['explanation']}\n\nMedia: {r['url']}"
57
+ except:
58
+ return "Failed to fetch NASA APOD."
59
+
60
+ # Parse tool call JSON inside [TOOL_CALL] {...}
61
+ def parse_tool_call(output):
62
+ if not output or "[TOOL_CALL]" not in output:
63
+ return None
64
+
65
+ match = re.search(r"\[TOOL_CALL\]\s*(\{.*?\})", output, re.DOTALL)
66
+ if not match:
67
+ return None
68
+
69
+ json_str = match.group(1).strip()
70
+ if not json_str or json_str in ["null", "None", ""]:
71
+ return None
72
+
73
+ try:
74
+ return json.loads(json_str)
75
+ except json.JSONDecodeError as e:
76
+ print(f"❌ JSON parsing failed: {e}")
77
+ print(f"⚠️ Bad JSON string: {json_str}")
78
+ return None
79
+
80
+
81
+ # Chat logic
82
+ def respond(message):
83
+ prompt = f"### Human:\n{message}\n\n### Assistant:"
84
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
85
+ outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.7, do_sample=True)
86
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
87
+ reply = result.split("### Assistant:")[-1].strip()
88
+
89
+
90
+ tool = parse_tool_call(reply)
91
+ if tool:
92
+ if tool["tool"] == "get_weather":
93
+ return get_weather(tool.get("location", "Unknown"))
94
+ elif tool["tool"] == "get_apod":
95
+ return get_apod()
96
+ else:
97
+ return f"Tool not recognized: {tool['tool']}"
98
+ return reply
99
+
100
+ # UI
101
+ gr.Interface(
102
+ fn=respond,
103
+ inputs=gr.Textbox(lines=2, placeholder="Ask me something..."),
104
+ outputs="text",
105
+ title="Fermata AI 1.0",
106
+ description="Now powered by the official Gemma 3 model. Ask about the weather or NASA's daily space image!",
107
+ ).launch()