Spaces:
Runtime error
Runtime error
File size: 3,156 Bytes
bb0633d e0091ea bb0633d c0bd405 bb0633d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import os, json, requests, torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import gradio as gr
def fetch_file(space_id, filename):
url = f"https://huggingface.co/spaces/{space_id}/raw/main/{filename}"
try:
r = requests.get(url, timeout=10)
return r.text if r.status_code == 200 else ""
except:
return ""
def build_prompt(readme, code, reqs):
return f"""<s>[INST] You are a protocol intelligence model. Determine if this Hugging Face Space is monetized on-chain.
Return strictly in this JSON format:
{{
"is_revenue_ready": true|false,
"confidence": float,
"blockers": [ "reason 1", "reason 2" ],
"summary": "short summary"
}}
README:
{readme}
Code:
{code}
Dependencies:
{reqs}
[/INST]
"""
def run_audit(space_id, model_id):
readme = fetch_file(space_id, "README.md")
code = fetch_file(space_id, "app.py")
reqs = fetch_file(space_id, "requirements.txt")
prompt = build_prompt(readme, code, reqs)
try:
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
result = pipe(prompt)[0]["generated_text"]
except Exception as e:
return {"error": f"Model load failed: {str(e)}"}
try:
blob = result.split("{", 1)[1].rsplit("}", 1)[0]
js = json.loads("{" + blob + "}")
js["space_id"] = space_id
return js
except Exception as e:
return {"error": f"Output parse failed: {str(e)}", "raw": result}
def batch_audit():
spaces = open("space_list.txt").read().splitlines()
os.makedirs("out/unified_audit", exist_ok=True)
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
for sid in spaces:
result = run_audit(sid, model_id)
with open(f"out/unified_audit/{sid.replace('/', '__')}.json", "w") as f:
json.dump(result, f, indent=2)
print(f"✅ {sid}: {result.get('summary', result)}")
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# 🔍 HF Space Revenue Readiness Auditor (33x LLMs, No API Keys)")
sid = gr.Textbox(label="Space ID (e.g. username/space-name)")
model = gr.Dropdown(
label="Select LLM Model",
choices=[
"mistralai/Mistral-7B-Instruct-v0.1",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"google/gemma-2b-it",
"microsoft/phi-2",
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"NousResearch/Nous-Capybara-7B-V1",
"HuggingFaceH4/zephyr-7b-alpha",
"intel/neural-chat-7b-v3",
"tiiuae/falcon-rw-1b",
"EleutherAI/pythia-1.4b",
"EleutherAI/pythia-2.8b",
"Open-Orca/Mistral-7B-OpenOrca"
# Extend to full 33 here
],
value="mistralai/Mistral-7B-Instruct-v0.1"
)
run = gr.Button("Run Audit")
output = gr.JSON(label="Audit Result")
run.click(fn=run_audit, inputs=[sid, model], outputs=output)
# Uncomment to run CLI batch:
# batch_audit()
demo.launch() |