Spaces:

ALVHB95
/

TFM_DataScience_APP

Sleeping

App Files Files Community

ALVHB95 commited on Oct 9, 2025

Commit

3a7b6f0

1 Parent(s): 9150cfd

new model

Browse files

Files changed (3) hide show

Dockerfile.txt +20 -3
app.py +47 -44
requirements.txt +1 -1

Dockerfile.txt CHANGED Viewed

@@ -1,11 +1,28 @@
 FROM python:3.10
 WORKDIR /code
 COPY ./requirements.txt /code/requirements.txt
-RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 COPY . .
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.10
+# Prevent Python from writing .pyc files / enable unbuffered logs
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# Make Gradio listen on all interfaces and on port 7860
+ENV GRADIO_SERVER_NAME=0.0.0.0
+ENV GRADIO_SERVER_PORT=7860
+# Optional but recommended
+# ENV HUGGINGFACEHUB_API_TOKEN=hf_xxx
+# ENV USER_AGENT="green-greta/1.0 (+contact-or-repo)"
 WORKDIR /code
 COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir --upgrade -r /code/requirements.txt
 COPY . .
+# Expose Gradio port
+EXPOSE 7860
+# Your code calls app.launch(...) inside app.py, so just run Python.
+# (Uvicorn is for FastAPI apps, which you are not using here.)
+CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -1,15 +1,13 @@
 """
 =========================================================
- Fixed app.py — Green Greta (Gradio + HF + LangChain)
  Notes:
- - Replaced deprecated/404-prone HuggingFaceHub call with HuggingFaceEndpoint.
-   • Option A (default below): use a readily available public model (Zephyr) via the free Inference API.
-   • Option B (commented): keep Mixtral, but then you MUST provision an Inference Endpoint (paid) and set endpoint_url.
- - Made JSON parsing of the schema robust; no fragile string slicing.
- - Fixed EfficientNet input size bug (224x224, not 244x224).
- - Safer memory setup for ConversationalRetrievalChain; added return_messages.
- - Better error handling on web loads and QA call.
- - Minor cleanups of duplicate imports, warnings, and defaults.
 =========================================================
 """
@@ -19,21 +17,22 @@ import shutil
 import gradio as gr
 import tensorflow as tf
-from tensorflow import keras
 from PIL import Image
 import tenacity  # for retrying failed requests
 from fake_useragent import UserAgent
-# LangChain
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.prompts import ChatPromptTemplate
-from langchain.output_parsers import PydanticOutputParser
-from langchain.chains import ConversationalRetrievalChain
-from langchain.memory import ConversationBufferMemory
 from langchain_community.document_loaders import WebBaseLoader
 from langchain_community.llms import HuggingFaceEndpoint
 from pydantic.v1 import BaseModel, Field
 # Theming
@@ -54,7 +53,7 @@ from huggingface_hub import from_pretrained_keras
 model1 = from_pretrained_keras("rocioadlc/efficientnetB0_trash")
 # Define class labels for the trash classification
-class_labels = ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']
 def predict_image(input_image: Image.Image):
@@ -71,16 +70,15 @@ def predict_image(input_image: Image.Image):
     predictions = model1.predict(image_array)
     probs = predictions[0].tolist()
     return {label: float(probs[i]) for i, label in enumerate(class_labels)}
 image_gradio_app = gr.Interface(
     fn=predict_image,
-    inputs=gr.Image(label="Image", sources=['upload', 'webcam'], type="pil"),
     outputs=[gr.Label(label="Result")],
     title="<span style='color: rgb(243, 239, 224);'>Green Greta</span>",
-    theme=theme
 )
 """
@@ -106,6 +104,7 @@ def safe_load_all_urls(urls):
             docs = load_url(link)
             all_docs.extend(docs)
         except Exception as e:
             print(f"Skipping URL due to error: {link}\nError: {e}\n")
     return all_docs
@@ -121,16 +120,15 @@ text_splitter = RecursiveCharacterTextSplitter(
 docs = text_splitter.split_documents(all_loaded_docs)
 # Small + high-quality general embedding
-embeddings = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
-persist_directory = 'docs/chroma/'
 shutil.rmtree(persist_directory, ignore_errors=True)
-from langchain.vectorstores import Chroma
 vectordb = Chroma.from_documents(
     documents=docs,
     embedding=embeddings,
-    persist_directory=persist_directory
 )
 retriever = vectordb.as_retriever(search_kwargs={"k": 3}, search_type="mmr")
@@ -145,6 +143,7 @@ class FinalAnswer(BaseModel):
     question: str = Field(description="User question")
     answer: str = Field(description="Direct answer")
 parser = PydanticOutputParser(pydantic_object=FinalAnswer)
 SYSTEM_TEMPLATE = (
@@ -169,14 +168,12 @@ qa_prompt = ChatPromptTemplate.from_template(
 """
 =========================================================
- 4) LLM SETUP (fixes the 404/deprecation issue)
 =========================================================
 """
 # IMPORTANT:
-# The previous code used `HuggingFaceHub` with repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1".
-# That route often 404s on the public Inference API router unless you deploy an Endpoint.
-# Fix: use `HuggingFaceEndpoint` with a public model that *is* available on the router,
-# or provision your own Inference Endpoint if you insist on Mixtral.
 # ---- Option A (DEFAULT): public, free router model that works out-of-the-box
 DEFAULT_REPO = os.environ.get("HF_REPO_ID", "HuggingFaceH4/zephyr-7b-beta")
@@ -189,11 +186,11 @@ llm = HuggingFaceEndpoint(
     top_k=50,
     repetition_penalty=1.05,
     do_sample=True,
-    # Make sure your token is set in env: HUGGINGFACEHUB_API_TOKEN
 )
-# ---- Option B (MIXTRAL): requires a paid Inference Endpoint you own
-# MIXTRAL_ENDPOINT_URL = os.environ.get("HF_ENDPOINT_URL")  # e.g. https://xyz.us-east-1.aws.endpoints.huggingface.cloud
 # if MIXTRAL_ENDPOINT_URL:
 #     llm = HuggingFaceEndpoint(
 #         endpoint_url=MIXTRAL_ENDPOINT_URL,
@@ -221,36 +218,38 @@ qa_chain = ConversationalRetrievalChain.from_llm(
     retriever=retriever,
     memory=memory,
     verbose=True,
-    combine_docs_chain_kwargs={'prompt': qa_prompt},
     get_chat_history=lambda h: h,  # memory already returns messages
     rephrase_question=False,
-    output_key='output',
 )
 def chat_interface(question, history):
     """
     Processes the user's question through the qa_chain,
-    and robustly parses the JSON output.
     """
     try:
-        result = qa_chain.invoke({'question': question})
-        raw = result.get('output', '').strip()
         # Try strict JSON first
         try:
             payload = json.loads(raw)
         except json.JSONDecodeError:
             # If the model returned extra text around JSON, try to extract the first JSON object
-            start = raw.find('{')
-            end = raw.rfind('}')
             if start != -1 and end != -1 and end > start:
-                payload = json.loads(raw[start:end+1])
             else:
                 payload = {"question": question, "answer": raw}
         # Enforce schema
-        question_out = payload.get("question", question)
         answer_out = payload.get("answer", raw)
         return answer_out
@@ -312,5 +311,9 @@ app = gr.TabbedInterface(
 # Enable queue() for concurrency and launch the Gradio app
 app.queue()
-# Tip: set share=True if you want a public link
-app.launch(share=os.environ.get("GRADIO_SHARE", "false").lower() == "true")

 """
 =========================================================
+ Fixed app.py — Green Greta (Gradio + HF + LangChain v0.2)
  Notes:
+ - Uses HuggingFaceEndpoint with a public router model (Zephyr) by default.
+ - Robust JSON parsing (no fragile string slicing).
+ - EfficientNet input size fixed (224x224).
+ - LangChain v0.2 import layout (core/community/text-splitters).
+ - Safer memory for ConversationalRetrievalChain; better error handling.
+ - Gradio binds to 0.0.0.0:7860 for Docker.
 =========================================================
 """
 import gradio as gr
 import tensorflow as tf
 from PIL import Image
 import tenacity  # for retrying failed requests
 from fake_useragent import UserAgent
+# LangChain (v0.2+ layout)
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import PydanticOutputParser
+from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.document_loaders import WebBaseLoader
 from langchain_community.llms import HuggingFaceEndpoint
+from langchain_community.vectorstores import Chroma
+from langchain.chains import ConversationalRetrievalChain
+from langchain.memory import ConversationBufferMemory
 from pydantic.v1 import BaseModel, Field
 # Theming
 model1 = from_pretrained_keras("rocioadlc/efficientnetB0_trash")
 # Define class labels for the trash classification
+class_labels = ["cardboard", "glass", "metal", "paper", "plastic", "trash"]
 def predict_image(input_image: Image.Image):
     predictions = model1.predict(image_array)
     probs = predictions[0].tolist()
     return {label: float(probs[i]) for i, label in enumerate(class_labels)}
 image_gradio_app = gr.Interface(
     fn=predict_image,
+    inputs=gr.Image(label="Image", sources=["upload", "webcam"], type="pil"),
     outputs=[gr.Label(label="Result")],
     title="<span style='color: rgb(243, 239, 224);'>Green Greta</span>",
+    theme=theme,
 )
 """
             docs = load_url(link)
             all_docs.extend(docs)
         except Exception as e:
+            # If load_url fails after all retries, skip that URL
             print(f"Skipping URL due to error: {link}\nError: {e}\n")
     return all_docs
 docs = text_splitter.split_documents(all_loaded_docs)
 # Small + high-quality general embedding
+embeddings = HuggingFaceEmbeddings(model_name="thenlper/gte-small")
+persist_directory = "docs/chroma/"
 shutil.rmtree(persist_directory, ignore_errors=True)
 vectordb = Chroma.from_documents(
     documents=docs,
     embedding=embeddings,
+    persist_directory=persist_directory,
 )
 retriever = vectordb.as_retriever(search_kwargs={"k": 3}, search_type="mmr")
     question: str = Field(description="User question")
     answer: str = Field(description="Direct answer")
 parser = PydanticOutputParser(pydantic_object=FinalAnswer)
 SYSTEM_TEMPLATE = (
 """
 =========================================================
+ 4) LLM SETUP (no router 404s)
 =========================================================
 """
 # IMPORTANT:
+# The old route "mistralai/Mixtral-8x7B-Instruct-v0.1" often 404s on the public HF router.
+# Use a router-available model OR your own paid Inference Endpoint.
 # ---- Option A (DEFAULT): public, free router model that works out-of-the-box
 DEFAULT_REPO = os.environ.get("HF_REPO_ID", "HuggingFaceH4/zephyr-7b-beta")
     top_k=50,
     repetition_penalty=1.05,
     do_sample=True,
+    # Set env: HUGGINGFACEHUB_API_TOKEN=hf_xxx
 )
+# ---- Option B (MIXTRAL): your paid Inference Endpoint
+# MIXTRAL_ENDPOINT_URL = os.environ.get("HF_ENDPOINT_URL")  # e.g. https://xyz.aws.endpoints.huggingface.cloud
 # if MIXTRAL_ENDPOINT_URL:
 #     llm = HuggingFaceEndpoint(
 #         endpoint_url=MIXTRAL_ENDPOINT_URL,
     retriever=retriever,
     memory=memory,
     verbose=True,
+    combine_docs_chain_kwargs={"prompt": qa_prompt},
     get_chat_history=lambda h: h,  # memory already returns messages
     rephrase_question=False,
+    output_key="output",
 )
 def chat_interface(question, history):
     """
     Processes the user's question through the qa_chain,
+    and robustly parses the JSON output per schema.
     """
     try:
+        result = qa_chain.invoke({"question": question})
+        raw = result.get("output", "").strip()
         # Try strict JSON first
         try:
             payload = json.loads(raw)
         except json.JSONDecodeError:
             # If the model returned extra text around JSON, try to extract the first JSON object
+            start = raw.find("{")
+            end = raw.rfind("}")
             if start != -1 and end != -1 and end > start:
+                try:
+                    payload = json.loads(raw[start : end + 1])
+                except json.JSONDecodeError:
+                    payload = {"question": question, "answer": raw}
             else:
                 payload = {"question": question, "answer": raw}
         # Enforce schema
         answer_out = payload.get("answer", raw)
         return answer_out
 # Enable queue() for concurrency and launch the Gradio app
 app.queue()
+# Tip: set GRADIO_SHARE=true in env if you want a public link
+app.launch(
+    server_name="0.0.0.0",
+    server_port=7860,
+    share=os.environ.get("GRADIO_SHARE", "false").lower() == "true",
+)

requirements.txt CHANGED Viewed

@@ -9,7 +9,7 @@ tensorflow==2.13.0
 langchain==0.2.12
 langchain-community==0.2.10
 langchain-text-splitters==0.2.2
-langchain-core==0.2.24
 # Vector store
 chromadb==0.5.3

 langchain==0.2.12
 langchain-community==0.2.10
 langchain-text-splitters==0.2.2
+langchain-core==0.2.27
 # Vector store
 chromadb==0.5.3