Spaces:
Running
Running
| # import ollama | |
| # # Synchronous ask (kept for caching or non-stream calls) | |
| # def ask_ollama(prompt: str, model_name: str = "llama3"): | |
| # response = ollama.chat( | |
| # model=model_name, | |
| # messages=[ | |
| # {"role": "system", "content": "You are a helpful assistant for college queries."}, | |
| # {"role": "user", "content": prompt} | |
| # ] | |
| # ) | |
| # return response.get("message", {}).get("content", "") | |
| # # Streaming generator: yields incremental text chunks | |
| # def ask_ollama_stream(prompt: str, model_name: str = "llama3"): | |
| # stream = ollama.chat( | |
| # model=model_name, | |
| # messages=[ | |
| # {"role": "system", "content": "You are a helpful assistant for college queries."}, | |
| # {"role": "user", "content": prompt} | |
| # ], | |
| # stream=True | |
| # ) | |
| # buffer = "" | |
| # for chunk in stream: | |
| # # chunk may contain partial content; combine | |
| # text = chunk.get("message", {}).get("content", "") | |
| # if text: | |
| # # yield incremental text (could be full or partial) | |
| # yield text | |
| from dotenv import load_dotenv | |
| import os | |
| import google.generativeai as genai | |
| # Load environment variables | |
| load_dotenv() | |
| GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
| if not GOOGLE_API_KEY: | |
| raise ValueError("β Missing GOOGLE_API_KEY in .env") | |
| print("Loaded key prefix:", GOOGLE_API_KEY[:15]) | |
| # Initialize Google Gemini client | |
| genai.configure(api_key=GOOGLE_API_KEY) | |
| SYSTEM_PROMPT = "You are a helpful academic assistant for IFHE University. Use only the provided context." | |
| # π§ Non-streaming function | |
| def ask_ollama(prompt: str): | |
| models_to_try = [ | |
| "gemini-2.5-flash", | |
| "gemini-2.5-pro", | |
| "gemini-pro", | |
| ] | |
| last_error = None | |
| for model_name in models_to_try: | |
| try: | |
| print(f"π Trying model: {model_name}") | |
| model = genai.GenerativeModel( | |
| model_name=model_name, | |
| system_instruction=SYSTEM_PROMPT, | |
| ) | |
| response = model.generate_content(prompt) | |
| text = response.text | |
| if text and text.strip(): | |
| return text.strip() | |
| print("β οΈ Empty output, trying next model...") | |
| except Exception as e: | |
| print(f"β Model failed ({model_name}):", e) | |
| last_error = e | |
| return f"β οΈ All models failed. Last error: {last_error}" | |
| def ask_ollama_stream(prompt: str, model_name: str = "gemini-2.5-flash"): | |
| """ | |
| Streams response token-by-token for real-time output. | |
| Includes detailed logging for debugging. | |
| """ | |
| try: | |
| print(f"π Connecting to Google Gemini model: {model_name}") | |
| model = genai.GenerativeModel( | |
| model_name=model_name, | |
| system_instruction="You are a helpful academic assistant for IFHE University.", | |
| ) | |
| stream = model.generate_content(prompt, stream=True) | |
| for chunk in stream: | |
| print(f"π¦ Chunk received: {chunk}") | |
| text = chunk.text | |
| if text: | |
| print(f"π§© Token: {text!r}") | |
| yield text | |
| print("β Streaming complete.") | |
| except Exception as e: | |
| print("β οΈ Streaming error:", e) | |
| yield f"β οΈ Error while streaming: {str(e)}" |