Spaces:

Akbartus
/

FastAPIExample

Sleeping

App Files Files Community

Akbartus commited on May 17, 2024

Commit

d10533c

verified ·

1 Parent(s): 38289a8

Update main2.py

Browse files

Files changed (1) hide show

main2.py +98 -24

main2.py CHANGED Viewed

@@ -1,28 +1,102 @@
 from fastapi import FastAPI
-from transformers import pipeline
-# Create a new FastAPI app instance
 app = FastAPI()
-# Initialize the text generation pipeline
-# This function will be able to generate text
-# given an input.
-summarizer = pipeline("summarization", model="google-t5/t5-base", tokenizer="google-t5/t5-base", framework="tf")
-# Define a function to handle the GET request at `/generate`
-# The generate() function is defined as a FastAPI route that takes a
-# string parameter called text. The function generates text based on the # input using the pipeline() object, and returns a JSON response
-# containing the generated text under the key "output"
-@app.get("/generate")
-def generate(text: str):
     """
-    Using the text2text-generation pipeline from `transformers`, generate text
-    from the given input text. The model used is `google/flan-t5-small`, which
-    can be found [here](<https://huggingface.co/google/flan-t5-small>).
     """
-    # Use the pipeline to generate text from the given input text
-    output = summarizer(text, min_length=5, max_length=20)
-    # Return the generated text in a JSON response
-    return {"output": output[0]["generated_text"]}

+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jan  4 05:56:28 2023
+@author: dreji18
+"""
+# loading the packages
+from rake_nltk import Rake
+import wikipedia
+from rank_bm25 import BM25Okapi
+import torch
+from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering
 from fastapi import FastAPI
 app = FastAPI()
+@app.get("/")
+def read_root():
+    return {"Hello": "World"}
+# keyword extraction function
+def keyword_extractor(query):
     """
+    Rake has some features:
+        1. convert automatically to lower case
+        2. extract important key phrases
+        3. it will extract combine words also (eg. Deep Learning, Capital City)
     """
+    r = Rake() # Uses stopwords for english from NLTK, and all puntuation characters.
+    r.extract_keywords_from_text(query)
+    keywords = r.get_ranked_phrases() # To get keyword phrases ranked highest to lowest.
+    return keywords
+# data collection using wikepedia
+def data_collection(search_words):
+    """wikipedia"""
+    search_query = ' '.join(search_words)
+    wiki_pages = wikipedia.search(search_query, results = 5)
+    information_list = []
+    pages_list = []
+    for i in wiki_pages:
+        try:
+            info = wikipedia.summary(i)
+            if any(word in info.lower() for word in search_words):
+                information_list.append(info)
+                pages_list.append(i)
+        except:
+            pass
+    original_info = information_list
+    information_list = [item[:1000] for item in information_list] # limiting the word len to 512
+    return information_list, pages_list, original_info
+# document ranking function
+def document_ranking(documents, query, n):
+    """BM25"""
+    try:
+        tokenized_corpus = [doc.split(" ") for doc in documents]
+        bm25 = BM25Okapi(tokenized_corpus)
+        tokenized_query = query.split(" ")
+        doc_scores = bm25.get_scores(tokenized_query)
+        datastore = bm25.get_top_n(tokenized_query, documents, n)
+    except:
+        pass
+    return datastore
+def qna(context, question):
+    """DistilBert"""
+    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased',return_token_type_ids = True)
+    model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased-distilled-squad', return_dict=False)
+    encoding = tokenizer.encode_plus(question, context)
+    input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
+    start_scores, end_scores = model(torch.tensor([input_ids]), attention_mask=torch.tensor([attention_mask]))
+    ans_tokens = input_ids[torch.argmax(start_scores) : torch.argmax(end_scores)+1]
+    answer_tokens = tokenizer.convert_ids_to_tokens(ans_tokens , skip_special_tokens=True)
+    answer_tokens_to_string = tokenizer.convert_tokens_to_string(answer_tokens)
+    return answer_tokens_to_string
+@app.get("/predict")
+def answergen(search_string: str):
+    try:
+        keyword_list = keyword_extractor(search_string)
+        information, pages, original_data = data_collection(keyword_list)
+        datastore = document_ranking(information, search_string, 3)
+        answers_list = []
+        for i in range(len(datastore)):
+            result = qna(datastore[i], search_string)
+            answers_list.append(result)
+        return {"answer 1": answers_list[0],
+                "answer 2": answers_list[1],
+                "answer 3": answers_list[2]}
+    except:
+        return {"sorry couldn't process the request"}
+#uvicorn app:app --port 8000 --reload
+#%%