Spaces:
Runtime error
Runtime error
Walid Aissa commited on
Commit ·
80e614a
1
Parent(s): 064fc00
better wikipedia search
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import os
|
|
| 2 |
import gradio as gr
|
| 3 |
import numpy as np
|
| 4 |
import wikipediaapi as wk
|
|
|
|
| 5 |
from transformers import (
|
| 6 |
TokenClassificationPipeline,
|
| 7 |
AutoModelForTokenClassification,
|
|
@@ -11,7 +12,7 @@ from transformers import (
|
|
| 11 |
)
|
| 12 |
from transformers.pipelines import AggregationStrategy
|
| 13 |
import torch
|
| 14 |
-
|
| 15 |
# =====[ DEFINE PIPELINE ]===== #
|
| 16 |
class KeyphraseExtractionPipeline(TokenClassificationPipeline):
|
| 17 |
def __init__(self, model, *args, **kwargs):
|
|
@@ -43,26 +44,36 @@ def keyphrases_extraction(text: str) -> str:
|
|
| 43 |
def wikipedia_search(input: str) -> str:
|
| 44 |
input = input.replace("\n", " ")
|
| 45 |
keyphrases = keyphrases_extraction(input)
|
|
|
|
| 46 |
wiki = wk.Wikipedia('en')
|
| 47 |
|
| 48 |
try :
|
| 49 |
#TODO: add better extraction and search
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
|
|
|
|
|
|
| 53 |
while not ('.' in page.summary) or not page.exists():
|
| 54 |
-
|
| 55 |
-
if
|
| 56 |
raise Exception
|
| 57 |
-
page = wiki.page(
|
| 58 |
-
return
|
|
|
|
| 59 |
except:
|
| 60 |
return "I cannot answer this question"
|
| 61 |
|
| 62 |
def answer_question(question):
|
| 63 |
|
| 64 |
context = wikipedia_search(question)
|
| 65 |
-
if context == "I cannot answer this question":
|
| 66 |
return context
|
| 67 |
|
| 68 |
# ======== Tokenize ========
|
|
@@ -99,6 +110,8 @@ def answer_question(question):
|
|
| 99 |
|
| 100 |
start_scores = outputs.start_logits
|
| 101 |
end_scores = outputs.end_logits
|
|
|
|
|
|
|
| 102 |
|
| 103 |
# ======== Reconstruct Answer ========
|
| 104 |
# Find the tokens with the highest `start` and `end` scores.
|
|
@@ -130,7 +143,7 @@ examples = [
|
|
| 130 |
["Where is the Eiffel Tower?"],
|
| 131 |
["What is the population of France?"]
|
| 132 |
]
|
| 133 |
-
|
| 134 |
demo = gr.Interface(
|
| 135 |
title = title,
|
| 136 |
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import numpy as np
|
| 4 |
import wikipediaapi as wk
|
| 5 |
+
import wikipedia
|
| 6 |
from transformers import (
|
| 7 |
TokenClassificationPipeline,
|
| 8 |
AutoModelForTokenClassification,
|
|
|
|
| 12 |
)
|
| 13 |
from transformers.pipelines import AggregationStrategy
|
| 14 |
import torch
|
| 15 |
+
print("hello")
|
| 16 |
# =====[ DEFINE PIPELINE ]===== #
|
| 17 |
class KeyphraseExtractionPipeline(TokenClassificationPipeline):
|
| 18 |
def __init__(self, model, *args, **kwargs):
|
|
|
|
| 44 |
def wikipedia_search(input: str) -> str:
|
| 45 |
input = input.replace("\n", " ")
|
| 46 |
keyphrases = keyphrases_extraction(input)
|
| 47 |
+
|
| 48 |
wiki = wk.Wikipedia('en')
|
| 49 |
|
| 50 |
try :
|
| 51 |
#TODO: add better extraction and search
|
| 52 |
+
if len(keyphrases) == 0:
|
| 53 |
+
return "Can you add more details to your question?"
|
| 54 |
+
|
| 55 |
+
query_suggestion = wikipedia.suggest(keyphrases[0])
|
| 56 |
+
if(query_suggestion != None):
|
| 57 |
+
results = wikipedia.search(query_suggestion)
|
| 58 |
+
else:
|
| 59 |
+
results = wikipedia.search(keyphrases[0])
|
| 60 |
|
| 61 |
+
index = 0
|
| 62 |
+
page = wiki.page(results[index])
|
| 63 |
while not ('.' in page.summary) or not page.exists():
|
| 64 |
+
index += 1
|
| 65 |
+
if index == len(results):
|
| 66 |
raise Exception
|
| 67 |
+
page = wiki.page(results[index])
|
| 68 |
+
return page.summary
|
| 69 |
+
|
| 70 |
except:
|
| 71 |
return "I cannot answer this question"
|
| 72 |
|
| 73 |
def answer_question(question):
|
| 74 |
|
| 75 |
context = wikipedia_search(question)
|
| 76 |
+
if (context == "I cannot answer this question") or (context == "Can you add more details to your question?"):
|
| 77 |
return context
|
| 78 |
|
| 79 |
# ======== Tokenize ========
|
|
|
|
| 110 |
|
| 111 |
start_scores = outputs.start_logits
|
| 112 |
end_scores = outputs.end_logits
|
| 113 |
+
print(start_scores)
|
| 114 |
+
print(end_scores)
|
| 115 |
|
| 116 |
# ======== Reconstruct Answer ========
|
| 117 |
# Find the tokens with the highest `start` and `end` scores.
|
|
|
|
| 143 |
["Where is the Eiffel Tower?"],
|
| 144 |
["What is the population of France?"]
|
| 145 |
]
|
| 146 |
+
print("hello")
|
| 147 |
demo = gr.Interface(
|
| 148 |
title = title,
|
| 149 |
|