Spaces:

alpcansoydas
/

irrelevant-content-detection

Sleeping

App Files Files Community

alpcansoydas commited on Aug 7, 2024

Commit

4a0ad23

verified ·

1 Parent(s): 00f486b

Upload 3 files

Browse files

Files changed (3) hide show

README.md +6 -6
app.py +110 -0
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
 ---
-title: Irrelevant Content Detection
-emoji: ⚡
-colorFrom: gray
-colorTo: indigo
 sdk: gradio
 sdk_version: 4.40.0
 app_file: app.py
 pinned: false
-license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Topic Detection
+emoji: 🐨
+colorFrom: pink
+colorTo: blue
 sdk: gradio
 sdk_version: 4.40.0
 app_file: app.py
 pinned: false
+license: mit
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import gradio as gr
+from langchain.prompts import PromptTemplate
+from langchain_huggingface import HuggingFaceEndpoint
+from langchain_core.output_parsers import JsonOutputParser
+from langdetect import detect
+import time
+# Initialize the LLM and other components
+llm = HuggingFaceEndpoint(
+    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
+    task="text-generation",
+    max_new_tokens=128,
+    temperature=0.7,
+    do_sample=False,
+)
+template_classify = '''
+You are a topic detector bot. Your task is to determine the main topic of given text phrase.
+Answer general main topic not specific words.
+Your answer does not contain specific information from given text.
+Answer just one general main topic. Do not answer two or more topic.
+Answer shortly with two or three word phrase. Do not answer with long sentence.
+If you do not know the topic just answer as General.
+What is the main topic of given text?:
+<text>
+{TEXT}
+</text>
+convert it to json format using 'Answer' as key and return it.
+Your final response MUST contain only the response, no other text.
+Example:
+{{"Answer":["General"]}}
+'''
+"""
+template_json = '''
+Your task is to read the following text, convert it to json format using 'Answer' as key and return it.
+<text>
+{RESPONSE}
+</text>
+Your final response MUST contain only the response, no other text.
+Example:
+{{"Answer":["General"]}}
+'''
+"""
+json_output_parser = JsonOutputParser()
+# Define the classify_text function
+def classify_text(text):
+    global llm
+    start = time.time()
+    lang = detect(text)
+    language_map = {"tr": "turkish",
+                    "en": "english",
+                    "ar": "arabic",
+                    "es": "spanish",
+                    "it": "italian",
+                    }
+    try:
+        lang = language_map[lang]
+    except:
+        lang = "en"
+    prompt_classify = PromptTemplate(
+        template=template_classify,
+        input_variables=["LANG", "TEXT"]
+    )
+    formatted_prompt = prompt_classify.format(TEXT=text, LANG=lang)
+    classify = llm.invoke(formatted_prompt)
+    '''
+    prompt_json = PromptTemplate(
+        template=template_json,
+        input_variables=["RESPONSE"]
+    )
+    '''
+    #formatted_prompt = template_json.format(RESPONSE=classify)
+    #response = llm.invoke(formatted_prompt)
+    parsed_output = json_output_parser.parse(classify)
+    end = time.time()
+    duration = end - start
+    return parsed_output, duration #['Answer']
+# Create the Gradio interface
+def gradio_app(text):
+    classification, time_taken = classify_text(text)
+    return classification, f"Time taken: {time_taken:.2f} seconds"
+def create_gradio_interface():
+    with gr.Blocks() as iface:
+        text_input = gr.Textbox(label="Text")
+        output_text = gr.Textbox(label="Detected Topics")
+        time_taken = gr.Textbox(label="Time Taken (seconds)")
+        submit_btn = gr.Button("Detect topic")
+        submit_btn.click(fn=classify_text, inputs=text_input, outputs=[output_text, time_taken])
+    iface.launch()
+if __name__ == "__main__":
+    create_gradio_interface()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+langchain==0.2.1
+langchain-community==0.2.1
+langchain-huggingface==0.0.3
+langdetect
+sentencepiece