Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,8 +3,8 @@ import requests
|
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
from transformers import pipeline
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
llm = pipeline("text2text-generation", model="
|
| 8 |
|
| 9 |
def refine_from_url(url, instruction):
|
| 10 |
try:
|
|
@@ -21,10 +21,10 @@ You are a data refinement agent. Given the following webpage content, do the fol
|
|
| 21 |
Instruction: {instruction}
|
| 22 |
|
| 23 |
Content:
|
| 24 |
-
{raw_text[:
|
| 25 |
"""
|
| 26 |
|
| 27 |
-
output = llm(prompt, max_new_tokens=
|
| 28 |
return output
|
| 29 |
except Exception as e:
|
| 30 |
return f"Error: {str(e)}"
|
|
@@ -35,9 +35,9 @@ demo = gr.Interface(
|
|
| 35 |
gr.Textbox(label="🔗 Enter Webpage URL"),
|
| 36 |
gr.Textbox(label="🧠 Instruction", placeholder="e.g. Clean and format this for GPT2 training")
|
| 37 |
],
|
| 38 |
-
outputs=gr.Textbox(label="📄 Refined JSONL Output"),
|
| 39 |
title="🧠 Link-Based Data Refiner + Q&A Generator",
|
| 40 |
-
description="Paste any webpage link. This app will crawl, refine, and generate question-answer pairs using
|
| 41 |
)
|
| 42 |
|
| 43 |
if __name__ == "__main__":
|
|
|
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
from transformers import pipeline
|
| 5 |
|
| 6 |
+
# Use faster, open-access model
|
| 7 |
+
llm = pipeline("text2text-generation", model="google/flan-t5-base")
|
| 8 |
|
| 9 |
def refine_from_url(url, instruction):
|
| 10 |
try:
|
|
|
|
| 21 |
Instruction: {instruction}
|
| 22 |
|
| 23 |
Content:
|
| 24 |
+
{raw_text[:3000]}
|
| 25 |
"""
|
| 26 |
|
| 27 |
+
output = llm(prompt, max_new_tokens=512)[0]["generated_text"]
|
| 28 |
return output
|
| 29 |
except Exception as e:
|
| 30 |
return f"Error: {str(e)}"
|
|
|
|
| 35 |
gr.Textbox(label="🔗 Enter Webpage URL"),
|
| 36 |
gr.Textbox(label="🧠 Instruction", placeholder="e.g. Clean and format this for GPT2 training")
|
| 37 |
],
|
| 38 |
+
outputs=gr.Textbox(label="📄 Refined JSONL Output", lines=30, max_lines=60),
|
| 39 |
title="🧠 Link-Based Data Refiner + Q&A Generator",
|
| 40 |
+
description="Paste any webpage link. This app will crawl, refine, and generate question-answer pairs using Flan-T5."
|
| 41 |
)
|
| 42 |
|
| 43 |
if __name__ == "__main__":
|