Spaces:

Bob-Potato
/

MetaGPT_AI

Runtime error

App Files Files Community

Bob-Potato commited on Oct 3, 2025

Commit

bcd3bd1

verified ·

1 Parent(s): 8baadf1

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -35

app.py CHANGED Viewed

@@ -1,22 +1,24 @@
 #!/usr/bin/env python3
 """
-Hugging Face Space app for Article Q&A AI.
-Robust version supporting JSON with different key capitalizations.
 """
 import os
 import json
 import faiss
 import numpy as np
-import gradio as gr
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 # ---- Config ----
 CHUNK_SIZE = 500
 CHUNK_OVERLAP = 100
-JSON_FILE = "articles.json"  # relative to WORKDIR
-TOP_K = 4
 SERVER_PORT = 7860
 # ---- Global variables ----
@@ -26,7 +28,7 @@ INDEX_DIM = None
 # ---- Models ----
 embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-gen_model_name = "google/flan-t5-small"
 tokenizer = AutoTokenizer.from_pretrained(gen_model_name)
 gen_model = AutoModelForSeq2SeqLM.from_pretrained(gen_model_name)
 gen_pipeline = pipeline(
@@ -48,17 +50,10 @@ def chunk_text(text):
     return chunks
 def build_index_in_memory():
-    """Build FAISS index in memory and return index, metadata, dim"""
     print("🚀 Building FAISS index...")
-    print("Current WORKDIR:", os.getcwd())
-    print("Files in WORKDIR:", os.listdir("."))
-    print("Looking for articles.json:", JSON_FILE)
-    print("Exists?", os.path.exists(JSON_FILE))
     if not os.path.exists(JSON_FILE):
         print("❌ articles.json missing")
         return None, None, None
     try:
         with open(JSON_FILE, "r", encoding="utf-8") as f:
             articles = json.load(f)
@@ -67,24 +62,20 @@ def build_index_in_memory():
         return None, None, None
     if not articles:
-        print("❌ articles.json is empty")
         return None, None, None
     embeddings_list, texts, metas = [], [], []
     for art_id, art in enumerate(articles):
-        # Support both lowercase and capitalized keys
         content = art.get("Continut") or art.get("continut") or ""
         url = art.get("URL") or art.get("url") or ""
         title = art.get("Titlu") or art.get("titlu") or f"articol_{art_id}"
         if not content.strip():
             continue
         chunks = chunk_text(content)
-        if len(chunks) == 0:
             continue
         embs = embed_model.encode(chunks, convert_to_numpy=True)
         if embs.ndim == 1:
             embs = embs.reshape(1, -1)
@@ -92,7 +83,7 @@ def build_index_in_memory():
         texts.extend(chunks)
         metas.extend([{"source": title, "url": url, "chunk_id": i} for i in range(len(chunks))])
-    if len(embeddings_list) == 0:
         print("❌ No valid chunks found")
         return None, None, None
@@ -101,14 +92,12 @@ def build_index_in_memory():
     d = embeddings.shape[1]
     index = faiss.IndexFlatIP(d)
     index.add(embeddings)
     metadata = {"texts": texts, "metas": metas}
     print(f"✅ Index built with {len(texts)} chunks")
     return index, metadata, d
-def ask_question(question, top_k=TOP_K, max_answer_tokens=256):
     global INDEX, METADATA, INDEX_DIM
     if not question.strip():
         return "⚠️ Please provide a question."
@@ -121,7 +110,6 @@ def ask_question(question, top_k=TOP_K, max_answer_tokens=256):
     if q_emb.ndim == 1:
         q_emb = q_emb.reshape(1, -1)
-    # Rebuild index if embedding dimension mismatch
     if INDEX_DIM is None or q_emb.shape[1] != INDEX_DIM:
         INDEX, METADATA, INDEX_DIM = build_index_in_memory()
         if INDEX is None or q_emb.shape[1] != INDEX_DIM:
@@ -154,18 +142,17 @@ def ask_question(question, top_k=TOP_K, max_answer_tokens=256):
     return f"{out} Find out more at {', '.join([u for u in urls if u])}"
-def main():
-    print("🚀 Starting Article Q&A AI...")
-    print(f"📁 Looking for articles.json at {JSON_FILE}")
-    iface = gr.Interface(
-        fn=ask_question,
-        inputs=[gr.Textbox(label="Întrebare")],
-        outputs=[gr.Textbox(label="Răspuns")],
-        live=False,
-    )
-    iface.launch(server_name="0.0.0.0", server_port=SERVER_PORT)
 if __name__ == "__main__":
-    main()

 #!/usr/bin/env python3
 """
+HF Space API for Article Q&A AI.
+Optimized for CPU / Free Tier.
+Uses tiny-flan-t5 for faster generation.
 """
 import os
 import json
 import faiss
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+from fastapi import FastAPI
+from pydantic import BaseModel
 # ---- Config ----
 CHUNK_SIZE = 500
 CHUNK_OVERLAP = 100
+JSON_FILE = "articles.json"
+TOP_K = 3  # fewer chunks for speed
 SERVER_PORT = 7860
 # ---- Global variables ----
 # ---- Models ----
 embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+gen_model_name = "sshleifer/tiny-flan-t5"
 tokenizer = AutoTokenizer.from_pretrained(gen_model_name)
 gen_model = AutoModelForSeq2SeqLM.from_pretrained(gen_model_name)
 gen_pipeline = pipeline(
     return chunks
 def build_index_in_memory():
     print("🚀 Building FAISS index...")
     if not os.path.exists(JSON_FILE):
         print("❌ articles.json missing")
         return None, None, None
     try:
         with open(JSON_FILE, "r", encoding="utf-8") as f:
             articles = json.load(f)
         return None, None, None
     if not articles:
+        print("❌ articles.json empty")
         return None, None, None
     embeddings_list, texts, metas = [], [], []
     for art_id, art in enumerate(articles):
         content = art.get("Continut") or art.get("continut") or ""
         url = art.get("URL") or art.get("url") or ""
         title = art.get("Titlu") or art.get("titlu") or f"articol_{art_id}"
         if not content.strip():
             continue
         chunks = chunk_text(content)
+        if not chunks:
             continue
         embs = embed_model.encode(chunks, convert_to_numpy=True)
         if embs.ndim == 1:
             embs = embs.reshape(1, -1)
         texts.extend(chunks)
         metas.extend([{"source": title, "url": url, "chunk_id": i} for i in range(len(chunks))])
+    if not embeddings_list:
         print("❌ No valid chunks found")
         return None, None, None
     d = embeddings.shape[1]
     index = faiss.IndexFlatIP(d)
     index.add(embeddings)
     metadata = {"texts": texts, "metas": metas}
     print(f"✅ Index built with {len(texts)} chunks")
     return index, metadata, d
+def ask_question(question, top_k=TOP_K, max_answer_tokens=64):
     global INDEX, METADATA, INDEX_DIM
     if not question.strip():
         return "⚠️ Please provide a question."
     if q_emb.ndim == 1:
         q_emb = q_emb.reshape(1, -1)
     if INDEX_DIM is None or q_emb.shape[1] != INDEX_DIM:
         INDEX, METADATA, INDEX_DIM = build_index_in_memory()
         if INDEX is None or q_emb.shape[1] != INDEX_DIM:
     return f"{out} Find out more at {', '.join([u for u in urls if u])}"
+# ---- FastAPI ----
+app = FastAPI()
+class Question(BaseModel):
+    text: str
+@app.post("/ask")
+def ask(q: Question):
+    return {"answer": ask_question(q.text)}
 if __name__ == "__main__":
+    import uvicorn
+    INDEX, METADATA, INDEX_DIM = build_index_in_memory()
+    uvicorn.run(app, host="0.0.0.0", port=SERVER_PORT)