Spaces:

Vivek16
/

final

Sleeping

App Files Files Community

Vivek16 commited on Oct 21, 2025

Commit

ab96010

verified ·

1 Parent(s): 7059f9b

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -95

app.py CHANGED Viewed

@@ -1,121 +1,68 @@
 import gradio as gr
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from peft import PeftModel
-import re # Used for advanced cleanup
 # --- Configuration ---
-# Your new model repo on the Hub (where the LoRA adapters and tokenizer are)
-ADAPTER_MODEL_ID = "Vivek16/Root_Math-TinyLlama-CPU"
-# The base model you used for training
-BASE_MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-# The highly restrictive system prompt to force a simple output.
-SYSTEM_PROMPT = "Calculate the final numerical answer to the user's math problem. **Do not show the formula or steps.** Only provide the final numerical result and the units (e.g., '20 cm')."
-# --- Model Loading (Runs only once when the Space starts) ---
-# 1. Load the Tokenizer
-tokenizer = AutoTokenizer.from_pretrained(ADAPTER_MODEL_ID)
-if tokenizer.pad_token is None:
-    tokenizer.pad_token = tokenizer.eos_token
-# 2. Load the Base Model
-# Use 'device_map="cpu"' to ensure it runs on the CPU instance
-base_model = AutoModelForCausalLM.from_pretrained(
-    BASE_MODEL_ID,
-    device_map="cpu",
-    torch_dtype=torch.float32,
-)
-# 3. Load the LoRA Adapters onto the Base Model
-model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID)
-model = model.eval() # Set to evaluation mode
-print(f"✅ Model loaded successfully from {ADAPTER_MODEL_ID}!")
 # ------------------------------------------------------------------
-# 💡 EXPLICIT API CALL FUNCTION
 # ------------------------------------------------------------------
-def call_model_api(prompt, model, tokenizer):
     """
-    Simulates calling an API to get the model's raw generation.
     """
-    inputs = tokenizer(prompt, return_tensors="pt")
-    # This is the actual API call to the loaded model's generation function
-    with torch.no_grad():
-        output_tokens = model.generate(
-            **inputs,
-            # Strict API parameters for focused output
-            max_new_tokens=64,
-            do_sample=True,
-            temperature=0.1,
-            top_k=5,
-            repetition_penalty=1.1,
-            pad_token_id=tokenizer.eos_token_id,
-            eos_token_id=tokenizer.eos_token_id
         )
-    # Decode the entire generated sequence immediately after the "API call"
-    raw_response = tokenizer.decode(output_tokens[0], skip_special_tokens=False)
-    return raw_response
 # --- Inference Function (The Main App Logic) ---
 def generate_response(message, history):
-    # 1. Format the Input Prompt
-    chat_template = "<|system|>\n{}</s>\n<|user|>\n{}</s>\n<|assistant|>\n"
-    prompt = chat_template.format(SYSTEM_PROMPT, message)
-    # 2. Call the Model (Simulated API Call)
-    response = call_model_api(prompt, model, tokenizer)
-    # ------------------------------------------------------------------
-    # CLEANUP LOGIC: (Aggressively filter the raw API response)
-    # ------------------------------------------------------------------
-    # 1. Remove the initial prompt tokens
-    assistant_prefix = "<|assistant|>\n"
-    if assistant_prefix in response:
-        response = response.split(assistant_prefix, 1)[1]
-    # 2. Define all known junk tokens for truncation
-    junk_tokens = [
-        "</s>", "<|user|>", "<|system|>", "\n\n", "User:",
-        "\\tag{", "\\end{align*}", "\\begin{align*}", "\\text{", "\\frac{", "\\pi",
-        "\\end{align*}", "\\tag{", "\\end{align*}", "\\tag{",
-        "\\tag{1}", "\\tag{2}", "\\tag{3}", "\\tag{4}"
-    ]
-    # 3. Truncate at the first sight of any junk token
-    for token in junk_tokens:
-        if token in response:
-            response = response.split(token, 1)[0]
-    # 4. Aggressive Numerical Extraction (Final resort)
-    # Tries to extract a number followed by optional units.
-    match = re.search(r'([\-]?\d+(\.\d+)?) ?([\w\^]*\s*cm\^?2?)', response, re.IGNORECASE)
-    if match:
-        extracted_answer = f"{match.group(1)} {match.group(3).strip()}"
-        return extracted_answer.strip()
-    # If no units are found, just extract the first number it generates
-    match_number = re.search(r'([\-]?\d+(\.\d+)?)', response)
-    if match_number:
-        return match_number.group(1).strip()
-    # 5. Final fallback
-    return response.strip()
 # --- Gradio Interface ---
 # We use gr.ChatInterface for a standard chatbot layout
 demo = gr.ChatInterface(
     fn=generate_response,
-    title=f"Root Math LLM (TinyLlama LoRA)",
-    description="Ask a math problem!",
 )
 if __name__ == "__main__":

 import gradio as gr
+import openai
+import os # To securely load the API key
+# No need to import torch, transformers, or peft for the external API call
 # --- Configuration ---
+# Your system prompt is now an instruction for the external model
+SYSTEM_PROMPT = "You are a highly accurate math solver. Provide the final numerical answer to the user's problem. Use the required units (e.g., '40 cm^2') and round to two decimal places if needed. Do not show your work, steps, or formulas."
+# Initialize the OpenAI client using the environment variable
+try:
+    # This automatically looks for the OPENAI_API_KEY environment variable
+    client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+except Exception as e:
+    print(f"Error initializing OpenAI client: {e}")
+    # Fallback for local testing if key is not set as environment variable
+    client = None
 # ------------------------------------------------------------------
+# 💡 EXPLICIT EXTERNAL API CALL FUNCTION
 # ------------------------------------------------------------------
+def call_external_api(prompt):
     """
+    Calls the external OpenAI API to get the model's response.
     """
+    if not client:
+        return "Error: API Key not configured. Please set OPENAI_API_KEY environment variable."
+    try:
+        # Call the chat completions API
+        response = client.chat.completions.create(
+            model="gpt-3.5-turbo", # A fast and capable model
+            messages=[
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.0 # Set to 0.0 for deterministic, accurate math answers
         )
+        # Extract the text content from the response
+        return response.choices[0].message.content.strip()
+    except Exception as e:
+        return f"API Call Error: Could not get a response from the external model. Details: {e}"
 # --- Inference Function (The Main App Logic) ---
 def generate_response(message, history):
+    # We will pass the user's message directly to the external API
+    # The system prompt is already defined in the API call
+    # 1. Call the External Model (The API Call)
+    # The external model handles all the complex generation and cleanup internally
+    response = call_external_api(message)
+    # 2. Return the clean response
+    return response
 # --- Gradio Interface ---
 # We use gr.ChatInterface for a standard chatbot layout
 demo = gr.ChatInterface(
     fn=generate_response,
+    title=f"Reliable Math LLM (Powered by External API)",
+    description="Ask a math problem! This uses a reliable external service for answers.",
 )
 if __name__ == "__main__":