Spaces:
Running
Running
| import requests | |
| from bs4 import BeautifulSoup | |
| from fastapi import FastAPI#, Request | |
| from fastapi.responses import StreamingResponse | |
| from pydantic import BaseModel | |
| import re | |
| import replicate | |
| import os | |
| import json | |
| from supabase import create_client, Client | |
| url: str = os.environ.get("DB_URL") | |
| key: str = os.environ.get("DB_KEY") | |
| supabase: Client = create_client(url, key) | |
| class Item(BaseModel): | |
| url: str | |
| max_tokens: int | |
| app = FastAPI() | |
| def extract_article_content(url): | |
| try: | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| results = soup.find_all(['h1', 'p']) | |
| text = [result.text for result in results] | |
| ARTICLE = ' '.join(text) | |
| return ARTICLE | |
| except Exception as e: | |
| return "" | |
| async def root(): | |
| return {"status": "OK"} | |
| async def root(item: Item): | |
| try: | |
| article = extract_article_content(item.url) | |
| if len(article) == 0: | |
| return {'summary': ""} | |
| event_list = [] | |
| for event in replicate.stream("snowflake/snowflake-arctic-instruct", input= { | |
| "prompt": "summarize this following news article:" + article, | |
| "temperature": 0.2}): | |
| # Convert the event to a string and append it to the list | |
| event_list.append(str(event)) | |
| # After the event stream ends, process the collected events | |
| output_variable = "".join(event_list) | |
| return output_variable | |
| except requests.RequestException as e: | |
| return {"error": str(e), "status_code": 500} | |
| # @app.post("/summarize-v2") | |
| # async def root(item: Item): | |
| # try: | |
| # article = extract_article_content(item.url) | |
| # if len(article) == 0: | |
| # return {'summary': ""} | |
| # def event_generator(): | |
| # for event in replicate.stream("snowflake/snowflake-arctic-instruct", input={ | |
| # "prompt": f"summarize this news article in {item.max_tokens} lines:" + article, | |
| # "temperature": 0.2, | |
| # "max_new_tokens" : 1000 | |
| # }): | |
| # # Yield the event as a string | |
| # yield str(event) | |
| # #print(str(event), end="") | |
| # # Use StreamingResponse to stream the events | |
| # return StreamingResponse(event_generator(), media_type='text/event-stream') | |
| # except requests.RequestException as e: | |
| # return {"error": str(e), "status_code": 500} | |
| async def root(item: Item): | |
| try: | |
| article = extract_article_content(item.url) | |
| if len(article) == 0: | |
| return {'summary': ""} | |
| api_url = 'https://yashxx07-hf-llm-api.hf.space/api/v1/chat/completions' | |
| headers = { "content-type": "application/json" } | |
| data = { | |
| "model": "mixtral-8x7b", | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": f"summarize this news article in {item.max_tokens} lines:" + article | |
| } | |
| ], | |
| "temperature": 0.5, | |
| "top_p": 0.95, | |
| "max_tokens": -1, | |
| "use_cache": False, | |
| "stream": True | |
| } | |
| def get_stream(url): | |
| s = requests.Session() | |
| with s.post(url, headers=headers, stream=True, json=data ) as resp: | |
| for line in resp.iter_lines(): | |
| if line: | |
| obj = json.loads(line[5:]) | |
| try: | |
| ouput = obj["choices"][0]["delta"]["content"] | |
| yield str(ouput) | |
| except: | |
| pass | |
| # Use StreamingResponse to stream the events | |
| return StreamingResponse(get_stream(api_url), media_type='text/event-stream') | |
| except requests.RequestException as e: | |
| return {"error": str(e), "status_code": 500} | |
| async def root(item: Item): | |
| try: | |
| article = extract_article_content(item.url) | |
| if len(article) == 0: | |
| return {'ERROR': "AHHHHHHHHH"} | |
| return {"content":article} | |
| except requests.RequestException as e: | |
| return {"error": str(e), "status_code": 500} | |
| async def insert_image(url , id): | |
| data, count = await supabase.table('news').update({'image_url': url}).eq('id', id).execute() | |
| async def root(site: str = 'abcnews.go'): | |
| try: | |
| ii_list = [] | |
| response = supabase.table('news').select("*").eq('source', f'www.{site}.com').is_('image_url', 'null').order('published_date', desc=True).limit(15).execute() | |
| for i in range(len(response.data)): | |
| url = response.data[i]['article_url'] | |
| try: | |
| res = requests.get(url) | |
| soup = BeautifulSoup(res.text, 'html.parser') | |
| results = soup.find_all(['img']) | |
| url = results[1]['src'] | |
| ii_list.append(url) | |
| await insert_image(url, response.data[i]['id'] ) | |
| except Exception as e: | |
| print(e) | |
| results = None | |
| return {"RESULTS": ii_list} | |
| except requests.RequestException as e: | |
| return {"error": str(e), "status_code": 500} | |
| async def root(site: str = 'livemint'): | |
| try: | |
| ii_list = [] | |
| response = supabase.table('news').select("*").eq('source', f'www.livemint.com').is_('image_url', 'null').order('published_date', desc=True).limit(15).execute() | |
| for i in range(len(response.data)): | |
| url = response.data[i]['article_url'] | |
| try: | |
| res = requests.get(url) | |
| soup = BeautifulSoup(res.text, 'html.parser') | |
| results = soup.find_all(['img']) | |
| url = results[18]['src'] | |
| ii_list.append(url) | |
| await insert_image(url, response.data[i]['id'] ) | |
| except Exception as e: | |
| print(e) | |
| results = None | |
| return {"RESULTS": ii_list} | |
| except requests.RequestException as e: | |
| return {"error": str(e), "status_code": 500} | |