ghosthets commited on
Commit
502e656
·
verified ·
1 Parent(s): 7d512b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -3,8 +3,8 @@ import requests
3
  from bs4 import BeautifulSoup
4
  from transformers import pipeline
5
 
6
- # Load open-access LLM
7
- llm = pipeline("text2text-generation", model="deepseek-ai/deepseek-coder-6.7b-instruct")
8
 
9
  def refine_from_url(url, instruction):
10
  try:
@@ -21,10 +21,10 @@ You are a data refinement agent. Given the following webpage content, do the fol
21
  Instruction: {instruction}
22
 
23
  Content:
24
- {raw_text[:6000]}
25
  """
26
 
27
- output = llm(prompt, max_new_tokens=1024)[0]["generated_text"]
28
  return output
29
  except Exception as e:
30
  return f"Error: {str(e)}"
@@ -35,9 +35,9 @@ demo = gr.Interface(
35
  gr.Textbox(label="🔗 Enter Webpage URL"),
36
  gr.Textbox(label="🧠 Instruction", placeholder="e.g. Clean and format this for GPT2 training")
37
  ],
38
- outputs=gr.Textbox(label="📄 Refined JSONL Output"),
39
  title="🧠 Link-Based Data Refiner + Q&A Generator",
40
- description="Paste any webpage link. This app will crawl, refine, and generate question-answer pairs using DeepSeek LLM."
41
  )
42
 
43
  if __name__ == "__main__":
 
3
  from bs4 import BeautifulSoup
4
  from transformers import pipeline
5
 
6
+ # Use faster, open-access model
7
+ llm = pipeline("text2text-generation", model="google/flan-t5-base")
8
 
9
  def refine_from_url(url, instruction):
10
  try:
 
21
  Instruction: {instruction}
22
 
23
  Content:
24
+ {raw_text[:3000]}
25
  """
26
 
27
+ output = llm(prompt, max_new_tokens=512)[0]["generated_text"]
28
  return output
29
  except Exception as e:
30
  return f"Error: {str(e)}"
 
35
  gr.Textbox(label="🔗 Enter Webpage URL"),
36
  gr.Textbox(label="🧠 Instruction", placeholder="e.g. Clean and format this for GPT2 training")
37
  ],
38
+ outputs=gr.Textbox(label="📄 Refined JSONL Output", lines=30, max_lines=60),
39
  title="🧠 Link-Based Data Refiner + Q&A Generator",
40
+ description="Paste any webpage link. This app will crawl, refine, and generate question-answer pairs using Flan-T5."
41
  )
42
 
43
  if __name__ == "__main__":