nixaut-codelabs commited on
Commit
821b5d0
·
verified ·
1 Parent(s): 5240a51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -38
app.py CHANGED
@@ -1,12 +1,13 @@
1
  from fastapi import FastAPI, HTTPException, Depends, status
2
  from fastapi.security import HTTPBearer
3
  from pydantic import BaseModel
4
- from transformers import pipeline
5
  import gradio as gr
6
  import os
7
  from dotenv import load_dotenv
8
  import uvicorn
9
  import threading
 
10
 
11
  load_dotenv()
12
 
@@ -17,7 +18,20 @@ API_KEY = os.getenv("API_KEY")
17
  if not API_KEY:
18
  raise ValueError("API_KEY not found in environment variables")
19
 
20
- pipe = pipeline("text-generation", model="unsloth/gemma-3-270m-it-GGUF", model_kwargs={"gguf_file": "gemma-3-270m-it-UD-Q8_K_XL.gguf"})
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def load_system_prompt():
23
  try:
@@ -44,21 +58,25 @@ def verify_api_key(credentials = Depends(security)):
44
 
45
  @app.post("/enhance", response_model=EnhanceResponse)
46
  async def enhance_prompt(request: EnhanceRequest, api_key: str = Depends(verify_api_key)):
47
- messages = [
48
- {"role": "system", "content": SYSTEM_PROMPT},
49
- {"role": "user", "content": request.prompt}
50
- ]
51
 
52
  try:
53
- result = pipe(messages, max_new_tokens=256, temperature=0.7, do_sample=True)
54
- enhanced_prompt = result[0]["generated_text"]
 
 
 
 
 
 
 
55
 
56
- if isinstance(enhanced_prompt, list):
57
- user_message = next((msg["content"] for msg in enhanced_prompt if msg["role"] == "assistant"), enhanced_prompt[-1]["content"])
58
- else:
59
- user_message = enhanced_prompt.split("assistant")[-1].strip() if "assistant" in enhanced_prompt else enhanced_prompt
60
 
61
- return EnhanceResponse(enhanced_prompt=user_message)
 
 
 
62
  except Exception as e:
63
  raise HTTPException(status_code=500, detail=f"Enhancement failed: {str(e)}")
64
 
@@ -72,41 +90,25 @@ def enhance_for_gradio(prompt_text, api_key):
72
  if api_key != API_KEY:
73
  return "Invalid API key."
74
 
75
- # Gemma model formatı
76
- full_prompt = f"""<start_of_turn>user
77
- {SYSTEM_PROMPT}
78
-
79
- {prompt_text}<end_of_turn>
80
- <start_of_turn>model
81
- """
82
 
83
  try:
84
- result = pipe(
85
  full_prompt,
86
- max_new_tokens=1024,
87
  temperature=0.7,
88
- do_sample=True,
89
- stop_strings=["<end_of_turn>"]
 
 
90
  )
91
 
92
- response = result[0]["generated_text"]
93
- print("=== RAW OUTPUT ===")
94
- print(response)
95
- print("=== END ===")
96
-
97
- # Yanıtı temizle
98
- if full_prompt in response:
99
- enhanced_prompt = response[len(full_prompt):].strip()
100
- else:
101
- enhanced_prompt = response.replace(full_prompt, "").strip()
102
-
103
- enhanced_prompt = enhanced_prompt.replace("<end_of_turn>", "").strip()
104
 
105
  if not enhanced_prompt:
106
  return "Model generated empty response."
107
 
108
- return enhanced_prompt[:2000] # Çok uzunsa kısalt
109
-
110
  except Exception as e:
111
  return f"Enhancement failed: {str(e)}"
112
 
 
1
  from fastapi import FastAPI, HTTPException, Depends, status
2
  from fastapi.security import HTTPBearer
3
  from pydantic import BaseModel
4
+ from llama_cpp import Llama
5
  import gradio as gr
6
  import os
7
  from dotenv import load_dotenv
8
  import uvicorn
9
  import threading
10
+ from huggingface_hub import snapshot_download
11
 
12
  load_dotenv()
13
 
 
18
  if not API_KEY:
19
  raise ValueError("API_KEY not found in environment variables")
20
 
21
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
22
+
23
+ snapshot_download(
24
+ repo_id="unsloth/gemma-3-270m-it-GGUF",
25
+ local_dir="gemma-3-270m-it-GGUF",
26
+ allow_patterns=["*UD-Q8_K_XL*"]
27
+ )
28
+
29
+ llm = Llama(
30
+ model_path="gemma-3-270m-it-GGUF/gemma-3-270m-it-UD-Q8_K_XL.gguf",
31
+ n_ctx=4096,
32
+ n_threads=2,
33
+ n_gpu_layers=0
34
+ )
35
 
36
  def load_system_prompt():
37
  try:
 
58
 
59
  @app.post("/enhance", response_model=EnhanceResponse)
60
  async def enhance_prompt(request: EnhanceRequest, api_key: str = Depends(verify_api_key)):
61
+ full_prompt = f"<start_of_turn>user\n{SYSTEM_PROMPT}\n\n{request.prompt}<end_of_turn>\n<start_of_turn>model\n"
 
 
 
62
 
63
  try:
64
+ result = llm(
65
+ full_prompt,
66
+ max_tokens=512,
67
+ temperature=0.7,
68
+ top_k=40,
69
+ top_p=0.95,
70
+ repeat_penalty=1.1,
71
+ stop=["<end_of_turn>"]
72
+ )
73
 
74
+ enhanced_prompt = result["choices"][0]["text"].strip()
 
 
 
75
 
76
+ if not enhanced_prompt:
77
+ raise HTTPException(status_code=500, detail="Enhancement failed: Empty response")
78
+
79
+ return EnhanceResponse(enhanced_prompt=enhanced_prompt)
80
  except Exception as e:
81
  raise HTTPException(status_code=500, detail=f"Enhancement failed: {str(e)}")
82
 
 
90
  if api_key != API_KEY:
91
  return "Invalid API key."
92
 
93
+ full_prompt = f"<start_of_turn>user\n{SYSTEM_PROMPT}\n\n{prompt_text}<end_of_turn>\n<start_of_turn>model\n"
 
 
 
 
 
 
94
 
95
  try:
96
+ result = llm(
97
  full_prompt,
98
+ max_tokens=512,
99
  temperature=0.7,
100
+ top_k=40,
101
+ top_p=0.95,
102
+ repeat_penalty=1.1,
103
+ stop=["<end_of_turn>"]
104
  )
105
 
106
+ enhanced_prompt = result["choices"][0]["text"].strip()
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  if not enhanced_prompt:
109
  return "Model generated empty response."
110
 
111
+ return enhanced_prompt
 
112
  except Exception as e:
113
  return f"Enhancement failed: {str(e)}"
114