Spaces:

NguyNhu
/

yellowpages

Sleeping

App Files Files Community

NguyNhu commited on May 8, 2025

Commit

97cb158

verified ·

1 Parent(s): be40855

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -82

app.py CHANGED Viewed

@@ -3,97 +3,118 @@
 import gradio as gr
 import sys
 import os
 # Thêm đường dẫn thư mục hiện tại vào sys.path để Python có thể tìm thấy script scraper
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-# Nhớ thay thế 'yellow_pages_scraper' bằng tên file .py thật của script scraper của bạn
-# và đảm bảo hàm 'parse_listing(keyword, location)' tồn tại trong file đó.
 try:
-    # Import hàm cạo dữ liệu từ script chính của bạn
-    from yellow_pages_scraper import parse_listing
-    # Đánh dấu là import thành công để biết hàm cạo có sẵn
-    scraper_function_available = True
-except ModuleNotFoundError:
-    scraper_function_available = False
-    print("Lỗi: Không tìm thấy file 'yellow_pages_scraper.py' hoặc hàm 'parse_listing'.")
-    print("Vui lòng đảm bảo script cạo dữ liệu của bạn tên là 'yellow_pages_scraper.py'")
-    print("và chứa hàm 'parse_listing(keyword, location)' trong cùng thư mục.")
-    # Định nghĩa một hàm giả để ứng dụng Gradio vẫn chạy nhưng báo lỗi rõ ràng
-    def parse_listing(keyword, location):
-         return [{"error": "Scraper script file or function not found."}]
-except ImportError as e:
-     scraper_function_available = False
-     print(f"Lỗi Import từ script scraper: {e}")
-     def parse_listing(keyword, location):
-          return [{"error": f"Error importing from scraper script: {e}"}]
-def run_scraper(keyword, location):
     """
-    Hàm này nhận input từ Gradio, gọi script cạo dữ liệu và định dạng output.
     """
-    if not scraper_function_available:
-         return "Lỗi: Script cạo dữ liệu chưa được tải đúng cách. Vui lòng kiểm tra file và tên hàm."
-    if not keyword or not location:
-        return "Vui lòng nhập cả Từ khóa tìm kiếm và Địa điểm."
-    print(f"Đang cạo dữ liệu cho: Từ khóa='{keyword}', Địa điểm='{location}'")
-    try:
-        # Gọi hàm cạo dữ liệu từ script của bạn
-        # Giả định hàm parse_listing trả về một list các dictionary
-        scraped_data = parse_listing(keyword, location)
-        if not scraped_data or (len(scraped_data) == 1 and "error" in scraped_data[0]):
-            # Xử lý trường hợp không có kết quả hoặc có lỗi từ hàm parse_listing
-            if scraped_data and "error" in scraped_data[0]:
-                 return f"Script cạo dữ liệu báo lỗi: {scraped_data[0]['error']}"
-            return "Không tìm thấy kết quả nào cho tìm kiếm này hoặc script cạo dữ liệu không hoạt động đúng."
-        # Định dạng dữ liệu để hiển thị trong output text area của Gradio
-        output_text = "Kết quả cạo dữ liệu:\n\n"
-        for i, item in enumerate(scraped_data):
-            output_text += f"--- Doanh nghiệp {i+1} ---\n"
-            # Sử dụng .get() để tránh lỗi nếu một key không tồn tại trong dictionary
-            # Điều chỉnh các key này ('business_name', 'telephone', v.v.)
-            # để phù hợp với các key mà script scraper của bạn sử dụng trong dictionary kết quả
-            output_text += f"Tên: {item.get('business_name', 'N/A')}\n"
-            output_text += f"Điện thoại: {item.get('telephone', 'N/A')}\n"
-            output_text += f"Địa chỉ: {item.get('street', 'N/A')}, {item.get('locality', 'N/A')}, {item.get('region', 'N/A')} {item.get('zipcode', 'N/A')}\n"
-            output_text += f"Website: {item.get('website', 'N/A')}\n"
-            output_text += f"Hạng mục: {item.get('category', 'N/A')}\n"
-            output_text += f"Đánh giá: {item.get('rating', 'N/A')}\n"
-            output_text += f"Rank: {item.get('rank', 'N/A')}\n"
-            output_text += f"URL: {item.get('url', 'N/A')}\n" # Đảm bảo script scraper có cạo URL
-            output_text += "-----------------------\n\n"
-        return output_text
-    except Exception as e:
-        # Xử lý các lỗi không mong muốn khác
-        print(f"Đã xảy ra lỗi không xác định khi chạy scraper: {e}")
-        import traceback
-        traceback.print_exc() # In traceback để debug
-        return f"Đã xảy ra lỗi không xác định khi chạy scraper: {e}"
 # Tạo giao diện Gradio
-iface = gr.Interface(
-    fn=run_scraper, # Hàm sẽ được gọi khi nhấn nút
-    inputs=[
-        gr.Textbox(label="Từ khóa tìm kiếm (ví dụ: restaurants)"),
-        gr.Textbox(label="Địa điểm (ví dụ: Boston,MA)")
-    ],
-    outputs=gr.Textbox(label="Kết quả cạo dữ liệu", lines=20, max_lines=50), # Hiển thị kết quả trong Textbox lớn
-    title="Yellow Pages Scraper Demo",
-    description="Nhập từ khóa và địa điểm để chạy script cạo thông tin doanh nghiệp từ Yellow Pages.",
-    allow_flagging="never", # Tắt tính năng flagging nếu không cần thiết
-    analytics_enabled=False # Tắt analytics nếu không cần thiết
-)
-# Chạy ứng dụng Gradio
-# Trong Hugging Face Space, dòng này sẽ tự động được gọi
-# Khi chạy cục bộ, nó sẽ mở giao diện trong trình duyệt
-iface.launch()

 import gradio as gr
 import sys
 import os
+import subprocess # Để chạy các script như subprocess nếu chúng không có hàm dễ gọi
 # Thêm đường dẫn thư mục hiện tại vào sys.path để Python có thể tìm thấy script scraper
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+# --- Cách 1: Import hàm trực tiếp (Nếu script có hàm dễ gọi) ---
+# Ví dụ cho yellowpages.py nếu nó có hàm parse_listing(keyword, location)
 try:
+    from yellowpages import parse_listing_function # Giả sử có hàm tên là parse_listing_function
+    yp_scraper_available = True
+except ImportError:
+    yp_scraper_available = False
+    print("Cảnh báo: Không thể import hàm từ yellowpages.py. Chức năng này sẽ không hoạt động.")
+    # Hàm giả khi import lỗi
+    def parse_listing_function(keyword, location):
+        return [{"error": "yellowpages.py scraper function not available."}]
+# --- Cách 2: Chạy script như một tiến trình con (Nếu script chỉ chạy từ command line) ---
+# Cách này phức tạp hơn vì cần bắt output từ subprocess
+def run_yellowpages_cli(keyword, location):
+    try:
+        # Thay thế 'python yellowpages.py' bằng lệnh chính xác để chạy script
+        # Cần xử lý output từ console để trả về cho Gradio
+        result = subprocess.run(
+            ["python", "yellowpages.py", keyword, location], # Đây là ví dụ, cần đúng cú pháp script
+            capture_output=True,
+            text=True,
+            check=True # Báo lỗi nếu script trả về mã lỗi khác 0
+        )
+        # Output thành công thường nằm trong stdout
+        # Cần phân tích cú pháp stdout nếu nó trả về dữ liệu cấu trúc (JSON, CSV in console)
+        # hoặc trả về nguyên văn nếu script in kết quả dễ đọc
+        return result.stdout
+    except subprocess.CalledProcessError as e:
+        return f"Lỗi khi chạy scraper từ command line: {e.stderr}"
+    except FileNotFoundError:
+         return "Lỗi: Không tìm thấy file yellowpages.py để chạy."
+# Hàm chính được gọi từ Gradio
+def scrape_data(scraper_choice, input1, input2=None):
     """
+    Hàm này nhận lựa chọn scraper và các input tương ứng.
     """
+    if scraper_choice == "Yellow Pages":
+        # Dùng Cách 1 (import hàm) nếu khả thi
+        if yp_scraper_available:
+            print(f"Chạy Yellow Pages với Keyword: {input1}, Location: {input2}")
+            scraped_data = parse_listing_function(input1, input2) # Gọi hàm đã import
+            # Định dạng kết quả từ list of dicts sang string
+            output_text = "Kết quả từ Yellow Pages:\n\n"
+            if not scraped_data or ("error" in scraped_data[0] if scraped_data else False):
+                return output_text + (scraped_data[0].get('error', 'Không có kết quả hoặc lỗi không xác định.') if scraped_data else 'Không có kết quả.')
+            for i, item in enumerate(scraped_data):
+                 output_text += f"--- {i+1} ---\n"
+                 # Thêm các key phù hợp với output của scraper yellowpages.py
+                 output_text += f"Name: {item.get('business_name', 'N/A')}\n"
+                 output_text += f"Phone: {item.get('telephone', 'N/A')}\n"
+                 output_text += f"Address: {item.get('street', 'N/A')}, {item.get('locality', 'N/A')}\n"
+                 output_text += f"Website: {item.get('website', 'N/A')}\n"
+                 output_text += f"Rating: {item.get('rating', 'N/A')}\n"
+                 output_text += f"URL: {item.get('url', 'N/A')}\n"
+                 output_text += "------------\n"
+            return output_text
+        else:
+             # Dùng Cách 2 (subprocess) nếu Cách 1 không được hoặc script chỉ chạy CLI
+             print(f"Chạy Yellow Pages CLI với Keyword: {input1}, Location: {input2}")
+             return run_yellowpages_cli(input1, input2) # Gọi hàm chạy subprocess
+    # Thêm các lựa chọn scraper khác tại đây
+    # elif scraper_choice == "Google Search":
+    #     # Cần import hàm hoặc chạy subprocess cho Google Search script
+    #     # Và xử lý input/output riêng
+    #     pass
+    # elif scraper_choice == "Amazon":
+    #     # Cần import hàm hoặc chạy subprocess cho Amazon script
+    #     # Và xử lý input/output riêng
+    #     pass
+    else:
+        return "Vui lòng chọn một scraper."
 # Tạo giao diện Gradio
+with gr.Blocks() as demo:
+    gr.Markdown("# Demo Các Web Scraper")
+    gr.Markdown("Chọn một scraper và nhập các thông tin cần thiết.")
+    scraper_dropdown = gr.Dropdown(
+        ["Yellow Pages"], # Thêm tên các scraper khác vào list này
+        label="Chọn Scraper"
+    )
+    # Các input fields. Visibility có thể được điều chỉnh dựa trên scraper_dropdown
+    input_keyword = gr.Textbox(label="Từ khóa")
+    input_location = gr.Textbox(label="Địa điểm (ví dụ: Boston,MA)")
+    # input_url = gr.Textbox(label="URL", visible=False) # Ví dụ input cho scraper khác
+    run_button = gr.Button("Chạy Scraper")
+    output_text = gr.Textbox(label="Kết quả", lines=20)
+    # Định nghĩa hành động khi nút được nhấn
+    # Cần truyền đúng số lượng và loại input cho hàm scrape_data
+    run_button.click(
+        scrape_data,
+        inputs=[scraper_dropdown, input_keyword, input_location], # Thêm input khác nếu cần
+        outputs=output_text
+    )
+    # Bạn có thể thêm các interaction để thay đổi hiển thị input
+    # dựa trên scraper_dropdown.change(...)
+demo.launch()