digitalicfai / llama_api.py
Chaitu2112's picture
Update llama_api.py
927f093 verified
# import ollama
# # Synchronous ask (kept for caching or non-stream calls)
# def ask_ollama(prompt: str, model_name: str = "llama3"):
# response = ollama.chat(
# model=model_name,
# messages=[
# {"role": "system", "content": "You are a helpful assistant for college queries."},
# {"role": "user", "content": prompt}
# ]
# )
# return response.get("message", {}).get("content", "")
# # Streaming generator: yields incremental text chunks
# def ask_ollama_stream(prompt: str, model_name: str = "llama3"):
# stream = ollama.chat(
# model=model_name,
# messages=[
# {"role": "system", "content": "You are a helpful assistant for college queries."},
# {"role": "user", "content": prompt}
# ],
# stream=True
# )
# buffer = ""
# for chunk in stream:
# # chunk may contain partial content; combine
# text = chunk.get("message", {}).get("content", "")
# if text:
# # yield incremental text (could be full or partial)
# yield text
from dotenv import load_dotenv
import os
import google.generativeai as genai
# Load environment variables
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
raise ValueError("❌ Missing GOOGLE_API_KEY in .env")
print("Loaded key prefix:", GOOGLE_API_KEY[:15])
# Initialize Google Gemini client
genai.configure(api_key=GOOGLE_API_KEY)
SYSTEM_PROMPT = "You are a helpful academic assistant for IFHE University. Use only the provided context."
# 🧠 Non-streaming function
def ask_ollama(prompt: str):
models_to_try = [
"gemini-2.5-flash",
"gemini-2.5-pro",
"gemini-pro",
]
last_error = None
for model_name in models_to_try:
try:
print(f"πŸ†“ Trying model: {model_name}")
model = genai.GenerativeModel(
model_name=model_name,
system_instruction=SYSTEM_PROMPT,
)
response = model.generate_content(prompt)
text = response.text
if text and text.strip():
return text.strip()
print("⚠️ Empty output, trying next model...")
except Exception as e:
print(f"❌ Model failed ({model_name}):", e)
last_error = e
return f"⚠️ All models failed. Last error: {last_error}"
def ask_ollama_stream(prompt: str, model_name: str = "gemini-2.5-flash"):
"""
Streams response token-by-token for real-time output.
Includes detailed logging for debugging.
"""
try:
print(f"πŸš€ Connecting to Google Gemini model: {model_name}")
model = genai.GenerativeModel(
model_name=model_name,
system_instruction="You are a helpful academic assistant for IFHE University.",
)
stream = model.generate_content(prompt, stream=True)
for chunk in stream:
print(f"πŸ“¦ Chunk received: {chunk}")
text = chunk.text
if text:
print(f"🧩 Token: {text!r}")
yield text
print("βœ… Streaming complete.")
except Exception as e:
print("⚠️ Streaming error:", e)
yield f"⚠️ Error while streaming: {str(e)}"