web-scraping

Paused

App Files Files Community

pvanand commited on Jun 9, 2024

Commit

a3bb0bd

verified ·

1 Parent(s): da118f4

Update main.py

Browse files

Files changed (1) hide show

main.py +13 -237

main.py CHANGED Viewed

@@ -1,181 +1,20 @@
-from fuzzy_json import loads
-from half_json.core import JSONFixer
-from openai import OpenAI
-from retry import retry
-import re
-from dotenv import load_dotenv
-import os
-from fastapi import FastAPI
-from fastapi import Query
 from pydantic import BaseModel
-from fastapi.middleware.cors import CORSMiddleware
-from helper_functions_api import md_to_html
-from duckduckgo_search import DDGS
-import time
-# Retrieve environment variables
-TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
-GROQ_API_KEY = "gsk_"+os.getenv("GROQ_API_KEY")
-HELICON_API_KEY = os.getenv("HELICON_API_KEY")
-SysPromptDefault = "You are an expert AI, complete the given task. Do not add any additional comments."
-SysPromptList = "You are now in the role of an expert AI who can extract structured information from user request. All elements must be in double quotes. You must respond ONLY with a valid python List. Do not add any additional comments."
-SysPromptJson = "You are now in the role of an expert AI who can extract structured information from user request. Both key and value pairs must be in double quotes. You must respond ONLY with a valid JSON file. Do not add any additional comments."
-SysPromptMd = "You are an expert AI who can create a structured report using information provided in the context from user request.The report should be in markdown format consists of markdown tables structured into subtopics. Do not add any additional comments."
-SysPromptMdOffline = "You are an expert AI who can create a structured report using your knowledge on user request.The report should be in markdown format consists of markdown tables/lists/paragraphs as needed, structured into subtopics. Do not add any additional comments."
-prompt_topics = """
-You are an expert data analyst. You have been providing data analysis and structuring services for over 15 years. You specialize in creating detailed and comprehensive lists of subtopics for various fields.
-Your task is to create a list of essential subtopics along with their descriptions based on the given USER_QUERY. The objective is to generate a detailed and precise list of subtopics that can help in understanding the main topic thoroughly.
-Follow these steps to complete the task:
-1. Identify and list 2 to {num_topics} essential subtopics related to {user_input}.
-2. Provide a detailed description for each subtopic explaining its significance and relevance to the main topic.
-3. Format your response as a valid Python list of lists, where each sub-list contains the subtopic and its description.
-Make sure your response is well-organized and provides comprehensive details for each subtopic.
-Take a deep breath and work on this problem step-by-step.
-output format
-[
-    ["Subtask Title 1", "Detailed description of subtask 1."],
-    ["Subtask Title 2", "Detailed description of subtask 2."],
-    ...
-]
-YOUR OUTPUT SHOULD CONSIST ONLY A VALID PYTHON LIST, DO NOT ADD ADDITIONAL COMMENTS
-"""
-prompt_subtopics = """You are a professional task manager and prompt engineer. You have been helping teams and individuals decompose complex tasks into actionable subtasks for over 20 years. Your expertise lies in breaking down intricate tasks into clear, manageable steps and ensuring that all relevant aspects are covered while excluding any specified topics.**
-Objective: Help create 2 to {num_topics} subtasks for an LLM to perform the specified task in the context of the given user query. Ensure that the generated subtasks are precise, actionable, and detailed. Exclude the specified topics from the subtasks.
-**Steps to complete the task:**
-1. **Understand the Main Task and User Query:**
-   - Read the main task and user query carefully to grasp the core objectives and context.
-   - Identify any specific requirements or constraints mentioned in the query.
-2. **Identify Key Components:**
-   - Break down the main task into its fundamental components.
-   - Ensure each component is essential to the overall goal and can be clearly defined.
-3. **Create Detailed Subtasks:**
-   - For each key component, create 2 to 5 detailed subtasks.
-   - Ensure each subtask is actionable and provides clear instructions on what needs to be done.
-   - Maintain logical order and ensure that the completion of each subtask contributes to the overall objective.
-4. **Exclude Specified Topics:**
-   - Review the list of topics to be excluded.
-   - Ensure that none of the generated subtasks include these topics.
-5. **Format the Response:**
-   - Present the subtasks in a structured Python list of lists format.
-   - Each sub-list should contain the subtask title and its detailed description.
-Output Format:
-[
-    ["Subtask Title 1", "Detailed description of subtask 1."],
-    ["Subtask Title 2", "Detailed description of subtask 2."],
-    ...
-]
-MAIN TASK: {main_task}
-USER QUERY:{user_input}
-TOPICS TO BE EXCLUDED:{excluded_topics}
-YOUR OUTPUT SHOULD CONSIST ONLY A VALID PYTHON LIST, DO NOT ADD ADDITIONAL COMMENTS
-"""
-### ------LLM CONFIG-------- ###
-together_client = OpenAI(
-        api_key=TOGETHER_API_KEY,
-        base_url="https://together.hconeai.com/v1",
-        default_headers={ "Helicone-Auth": f"Bearer {HELICON_API_KEY}"})
-groq_client = OpenAI(
-        api_key=GROQ_API_KEY,
-        base_url="https://groq.hconeai.com/openai/v1",
-        default_headers={ "Helicone-Auth": f"Bearer {HELICON_API_KEY}"})
-# Groq model names
-llm_default_small = "llama3-8b-8192"
-llm_default_medium = "llama3-70b-8192"
-# Together Model names (fallback)
-llm_fallback_small = "meta-llama/Llama-3-8b-chat-hf"
-llm_fallback_medium = "meta-llama/Llama-3-70b-chat-hf"
-### ------END OF LLM CONFIG-------- ###
-def together_response(message, model = llm_default_small, SysPrompt = SysPromptDefault, temperature=0.2, frequency_penalty =0.1, max_tokens= 2000):
-    messages=[{"role": "system", "content": SysPrompt},{"role": "user", "content": message}]
-    params = {
-      "model": model,
-      "messages": messages,
-      "temperature": temperature,
-      "frequency_penalty": frequency_penalty,
-      "max_tokens": max_tokens
-    }
-    try:
-      response = groq_client.chat.completions.create(**params)
-      return response.choices[0].message.content
-    except Exception as e:
-      print(f"Error calling GROQ API: {e}")
-      params["model"] = llm_fallback_small if model == llm_default_small else llm_fallback_medium
-      response = together_client.chat.completions.create(**params)
-      return response.choices[0].message.content
-def json_from_text(text):
-    """
-    Extracts JSON from text using regex and fuzzy JSON loading.
-    """
     try:
-      return json.loads(text)
-    except:
-      match = re.search(r'\{[\s\S]*\}', text)
-      if match:
-        json_out = match.group(0)
-      else:
-        json_out = text
-      # Use Fuzzy JSON loading
-      return loads(json_out)
-@retry(tries=3, delay=0.5)
-def generate_topics(user_input, num_topics, previous_queries):
-    prompt = prompt_topics.format(user_input=user_input, num_topics=num_topics)
-    response_topics = together_response(prompt, model=llm_default_medium, SysPrompt=SysPromptList, temperature=1)
-    subtopics = json_from_text(response_topics)
-    return subtopics
-@retry(tries=3, delay=0.5)
-def generate_subtopics(main_task,user_input,num_topics,excluded_topics):
-    excluded_topics = ",".join(excluded_topics)
-    prompt = prompt_subtopics.format(main_task = main_task,user_input=user_input, num_topics=num_topics, excluded_topics=excluded_topics)
-    response_topics = together_response(prompt, model=llm_default_medium, SysPrompt=SysPromptList, temperature=1)
-    subtopics = json_from_text(response_topics)
-    return subtopics
-@retry(tries=3, delay=0.5)
-def generate_report(topic, description):
-    prompt = f"""create a detailed report on: {topic} by following the instructions: {description}"""
-    md_report = together_response(prompt, model = llm_default_medium, SysPrompt = SysPromptMdOffline)
-    return md_to_html(md_report)
-@retry(tries=3, delay=0.5)
-def get_images(query, num_results):
-    time.sleep(0.5)
-    ddgs = DDGS()
-    imgs = ddgs.images(keywords=query, safesearch="on", max_results=num_results)
-    return imgs
-# Define the app
-app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
@@ -185,69 +24,6 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Create a Pydantic model to handle the input data
-class TopicInput(BaseModel):
-    user_input: str = Query(default="market research", description="input query to generate subtopics")
-    num_topics: int = Query(default=5, description="Number of subtopics to generate (default: 5)")
-    previous_queries: list[str] = Query(default=[], description="Deprecated: Use /generate_subtopics instead for subtopics")
-class SubTopicInput(BaseModel):
-    main_task: str = Query(default="detailed market research", description="Main task to be completed")
-    user_input: str = Query(default="I want to start a business in retail", description="input query to generate subtopics")
-    num_topics: int = Query(default=3, description="Number of max subtopics to generate (default: 3)")
-    excluded_topics: list[str] = Query(default=[], description="List all other main tasks to exclude")
-class imageInput(BaseModel):
-    user_input: str = Query(default="market research", description="input query to generate subtopics")
-    num_images: int = Query(default=5, description="Number of subtopics to generate (default: 5)")
-class ReportInput(BaseModel):
-    topic: str = Query(default="market research",description="The main topic for the report")
-    description: str = Query(default="",description="A brief description of the topic")
-class RecommendationInput(BaseModel):
-    user_input: str = Query(default="", description="Input query to generate follow-up questions")
-    num_recommendations: int = Query(default=5, description="Number of recommendations to generate")
 @app.get("/", tags=["Home"])
 def api_home():
-    return {'detail': 'Welcome to FastAPI Subtopics API! Visit https://pvanand-generate-subtopics.hf.space/docs to test'}
-@app.post("/generate_topics")
-async def create_topics(input: TopicInput):
-    topics = generate_topics(input.user_input, input.num_topics, input.previous_queries)
-    return {"topics": topics}
-@app.post("/generate_subtopics")
-async def create_subtopics(input: SubTopicInput):
-    topics = generate_subtopics(input.main_task, input.user_input, input.num_topics, input.excluded_topics)
-    return {"subtopics": topics}
-@app.post("/generate_report")
-async def create_report(input: ReportInput):
-    report = generate_report(input.topic, input.description)
-    return {"report": report}
-@app.post("/get_images")
-async def fetch_images(input: imageInput):
-    images = get_images(input.user_input, input.num_images)
-    return {"images": images}
-@app.post("/get_recommendations")
-async def generate_recommendations(input: RecommendationInput):
-    if input.user_input:
-        prompt = f"""create a list of {input.num_recommendations} questions that a user might ask following the question: {input.user_input}:"""
-    else:
-        prompt = f"""create a list of mixed {input.num_recommendations} questions to create a report or plan or course on any of the topics product,market,research topic """
-    response_topics = json_from_text(
-            together_response(
-                prompt, model=llm_default_small, SysPrompt=SysPromptList,temperature=1
-            )
-        )
-    return {"recommendations": response_topics}

+from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+import hrequests
+app = FastAPI()
+class URLRequest(BaseModel):
+    url: str
+@app.post("/scrape")
+async def scrape(url_request: URLRequest):
     try:
+        response = hrequests.get(url_request.url)
+        response.raise_for_status()  # Raise an HTTPError for bad responses
+        return {"content": response.text}
+    except hrequests.exceptions.RequestException as e:
+        raise HTTPException(status_code=400, detail=str(e))
 app.add_middleware(
     CORSMiddleware,
     allow_headers=["*"],
 )
 @app.get("/", tags=["Home"])
 def api_home():
+    return {'detail': 'Welcome to FastAPI Subtopics API! Visit https://pvanand-generate-subtopics.hf.space/docs to test'}