Spaces:

Pamudu13
/

deepseek-api

Paused

App Files Files Community

Pamudu13 commited on Feb 6, 2025

Commit

19a4a86

verified ·

1 Parent(s): 58c842e

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -252

app.py CHANGED Viewed

@@ -1,265 +1,40 @@
-from flask import Flask, jsonify, request
 import requests
-from bs4 import BeautifulSoup
 import os
-import re
-import urllib.parse
-import time
-import random
-import base64
-from io import BytesIO
-from urllib.parse import urlparse
-import html2text
 app = Flask(__name__)
-def search_images(query, num_images=5):
-    # Headers to mimic a browser request
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-        'Accept-Language': 'en-US,en;q=0.5',
-        'Accept-Encoding': 'gzip, deflate',
-        'DNT': '1',
-        'Connection': 'keep-alive',
-    }
-    # Format the query for URL
-    formatted_query = urllib.parse.quote(query)
-    # Google Images URL
-    url = f"https://www.google.com/search?q={formatted_query}&tbm=isch&safe=active"
     try:
-        # Get the HTML content
-        response = requests.get(url, headers=headers, timeout=30)
-        response.raise_for_status()
-        # Find all image URLs using regex
-        image_urls = re.findall(r'https?://[^"\']*?(?:jpg|jpeg|png|gif)', response.text)
-        # Remove duplicates while preserving order
-        image_urls = list(dict.fromkeys(image_urls))
-        # Store results
-        results = []
-        downloaded = 0
-        for img_url in image_urls:
-            if downloaded >= num_images:
-                break
-            try:
-                # Skip small thumbnails and icons
-                if 'gstatic.com' in img_url or 'google.com' in img_url:
-                    continue
-                # Download image
-                img_response = requests.get(img_url, headers=headers, timeout=10)
-                img_response.raise_for_status()
-                # Check if the response is actually an image
-                content_type = img_response.headers.get('Content-Type', '')
-                if not content_type.startswith('image/'):
-                    continue
-                # Convert image to base64
-                image_base64 = base64.b64encode(img_response.content).decode('utf-8')
-                # Add to results
-                results.append({
-                    'image_url': img_url,
-                    'base64_data': f"data:{content_type};base64,{image_base64}"
-                })
-                downloaded += 1
-                # Add a random delay between downloads
-                time.sleep(random.uniform(0.5, 1))
-            except Exception as e:
-                print(f"Error downloading image: {str(e)}")
-                continue
-        return results
-    except Exception as e:
-        print(f"An error occurred: {str(e)}")
-        return []
-@app.route('/search_images', methods=['GET'])
-def api_search_images():
-    try:
-        # Get query parameters
-        query = request.args.get('query', '')
-        num_images = int(request.args.get('num_images', 5))
-        if not query:
-            return jsonify({'error': 'Query parameter is required'}), 400
-        if num_images < 1 or num_images > 20:
-            return jsonify({'error': 'Number of images must be between 1 and 20'}), 400
-        # Search for images
-        results = search_images(query, num_images)
-        return jsonify({
-            'success': True,
-            'query': query,
-            'results': results
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'error': str(e)
-        }), 500
-def get_domain(url):
-    """Extract domain from URL"""
-    parsed_uri = urlparse(url)
-    return parsed_uri.netloc
-def clean_text(text):
-    """Clean scraped text"""
-    # Remove extra whitespace
-    text = re.sub(r'\s+', ' ', text)
-    # Remove special characters
-    text = re.sub(r'[^\w\s.,!?-]', '', text)
-    return text.strip()
-def scrape_website(url, headers):
-    """Scrape content from a single website"""
-    try:
-        response = requests.get(url, headers=headers, timeout=10)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.text, 'html.parser')
-        # Remove unwanted elements
-        for element in soup(['script', 'style', 'nav', 'footer', 'iframe']):
-            element.decompose()
-        # Convert HTML to text
-        h = html2text.HTML2Text()
-        h.ignore_links = True
-        h.ignore_images = True
-        text = h.handle(str(soup))
-        # Clean the text
-        text = clean_text(text)
-        # Get meta description
-        meta_desc = ''
-        meta_tag = soup.find('meta', attrs={'name': 'description'}) or soup.find('meta', attrs={'property': 'og:description'})
-        if meta_tag:
-            meta_desc = meta_tag.get('content', '')
-        # Get title
-        title = soup.title.string if soup.title else ''
-        return {
-            'title': clean_text(title),
-            'meta_description': clean_text(meta_desc),
-            'content': text[:10000],  # Limit content length
-            'url': url
         }
-    except Exception as e:
-        print(f"Error scraping {url}: {str(e)}")
-        return None
-def search_and_scrape(query, num_results=5):
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-        'Accept-Language': 'en-US,en;q=0.5',
-        'Accept-Encoding': 'gzip, deflate',
-        'DNT': '1',
-        'Connection': 'keep-alive',
-    }
-    # Format the query for URL
-    formatted_query = urllib.parse.quote(query)
-    # Google Search URL
-    url = f"https://www.google.com/search?q={formatted_query}&num={num_results}"
-    try:
-        # Get Google search results
-        response = requests.get(url, headers=headers, timeout=30)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.text, 'html.parser')
-        # Find all search result divs
-        search_results = []
-        result_divs = soup.find_all('div', class_='g')
-        for div in result_divs:
-            # Find the link
-            link = div.find('a')
-            if not link:
-                continue
-            href = link.get('href', '')
-            # Skip if not a valid URL or if it's a Google-related URL
-           # if not href.startswith('http') or 'google.' in href:
-           #     continue
-            # Add random delay between requests
-            time.sleep(random.uniform(1, 2))
-            # Scrape the website
-            site_data = scrape_website(href, headers)
-            if site_data:
-                search_results.append(site_data)
-            if len(search_results) >= num_results:
-                break
-        return search_results
     except Exception as e:
-        print(f"An error occurred: {str(e)}")
-        return []
-@app.route('/scrape_sites', methods=['GET'])
-def api_scrape_sites():
-    try:
-        # Get query parameters
-        query = request.args.get('query', '')
-        num_results = int(request.args.get('num_results', 5))
-        if not query:
-            return jsonify({'error': 'Query parameter is required'}), 400
-        if num_results < 1 or num_results > 10:
-            return jsonify({'error': 'Number of results must be between 1 and 10'}), 400
-        # Search and scrape sites
-        results = search_and_scrape(query, num_results)
-        return jsonify({
-            'success': True,
-            'query': query,
-            'results': results
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'error': str(e)
-        }), 500
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=5000)

+from flask import Flask, request, jsonify
 import requests
 import os
 app = Flask(__name__)
+API_URL = "https://api-inference.huggingface.co/models/deepseek-ai/deepseek-coder-6.7b-base"
+HEADERS = {"Authorization": f"Bearer {os.getenv('HUGGING_FACE_API_KEY')}"}
+@app.route('/', methods=['GET'])
+def generate_text():
     try:
+        # Get the user's prompt from query parameters
+        user_prompt = request.args.get('prompt', 'What is the capital of France?')
+        # Prepare the payload
+        payload = {
+            "inputs": user_prompt,
+            "parameters": {
+                "max_new_tokens": 500,
+                "temperature": 0.7,
+                "top_p": 0.95
+            }
         }
+        # Make request to Hugging Face API
+        response = requests.post(API_URL, headers=HEADERS, json=payload)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        # Extract the generated text from response
+        generated_text = response.json()[0]['generated_text']
+        return jsonify({'response': generated_text})
     except Exception as e:
+        print(f"Exception occurred: {e}")
+        return jsonify({'error': 'An unexpected error occurred.'}), 500
 if __name__ == '__main__':
+    app.run(debug=True)