File size: 3,156 Bytes
bb0633d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0091ea
bb0633d
 
 
 
 
 
c0bd405
bb0633d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os, json, requests, torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import gradio as gr

def fetch_file(space_id, filename):
    url = f"https://huggingface.co/spaces/{space_id}/raw/main/{filename}"
    try:
        r = requests.get(url, timeout=10)
        return r.text if r.status_code == 200 else ""
    except:
        return ""

def build_prompt(readme, code, reqs):
    return f"""<s>[INST] You are a protocol intelligence model. Determine if this Hugging Face Space is monetized on-chain.

Return strictly in this JSON format:
{{
  "is_revenue_ready": true|false,
  "confidence": float,
  "blockers": [ "reason 1", "reason 2" ],
  "summary": "short summary"
}}

README:
{readme}

Code:
{code}

Dependencies:
{reqs}
[/INST]
"""

def run_audit(space_id, model_id):
    readme = fetch_file(space_id, "README.md")
    code = fetch_file(space_id, "app.py")
    reqs = fetch_file(space_id, "requirements.txt")
    prompt = build_prompt(readme, code, reqs)

    try:
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
        pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
        result = pipe(prompt)[0]["generated_text"]
    except Exception as e:
        return {"error": f"Model load failed: {str(e)}"}

    try:
        blob = result.split("{", 1)[1].rsplit("}", 1)[0]
        js = json.loads("{" + blob + "}")
        js["space_id"] = space_id
        return js
    except Exception as e:
        return {"error": f"Output parse failed: {str(e)}", "raw": result}

def batch_audit():
    spaces = open("space_list.txt").read().splitlines()
    os.makedirs("out/unified_audit", exist_ok=True)
    model_id = "mistralai/Mistral-7B-Instruct-v0.1"
    for sid in spaces:
        result = run_audit(sid, model_id)
        with open(f"out/unified_audit/{sid.replace('/', '__')}.json", "w") as f:
            json.dump(result, f, indent=2)
        print(f"✅ {sid}: {result.get('summary', result)}")

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🔍 HF Space Revenue Readiness Auditor (33x LLMs, No API Keys)")
    sid = gr.Textbox(label="Space ID (e.g. username/space-name)")
    model = gr.Dropdown(
        label="Select LLM Model",
        choices=[
            "mistralai/Mistral-7B-Instruct-v0.1",
            "mistralai/Mixtral-8x7B-Instruct-v0.1",
            "google/gemma-2b-it",
            "microsoft/phi-2",
            "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
            "NousResearch/Nous-Capybara-7B-V1",
            "HuggingFaceH4/zephyr-7b-alpha",
            "intel/neural-chat-7b-v3",
            "tiiuae/falcon-rw-1b",
            "EleutherAI/pythia-1.4b",
            "EleutherAI/pythia-2.8b",
            "Open-Orca/Mistral-7B-OpenOrca"
            # Extend to full 33 here
        ],
        value="mistralai/Mistral-7B-Instruct-v0.1"
    )
    run = gr.Button("Run Audit")
    output = gr.JSON(label="Audit Result")
    run.click(fn=run_audit, inputs=[sid, model], outputs=output)

# Uncomment to run CLI batch:
# batch_audit()

demo.launch()