SureRAG

Runtime error

App Files Files Community

Tonic commited on Feb 13, 2024

Commit

e62dd99

verified ·

1 Parent(s): bb6e5e5

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -9

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import os
 import json
 import time
 import transformers
 from transformers import AutoTokenizer, AutoModelForCausalLM
 hf_token = os.getenv("HF_AUTH_TOKEN")
@@ -156,21 +157,51 @@ def query_vectara(text):
     else:
         return f"Error: {response.status_code}"
-# Main function to integrate Vectara, OLMo, and hallucination check
-def evaluate_content(user_input):
-    vectara_summary = query_vectara(user_input)
-    olmo_output = generate_text(vectara_summary)
-    hallucination_score = check_hallucination(olmo_output, vectara_summary)
-    return olmo_output, hallucination_score
-# Create the Gradio interface
 iface = gr.Interface(
     fn=evaluate_content,
     inputs=[gr.Textbox(label="User Input")],
     outputs=[
-        gr.Textbox(label="Vectara Summary"),
         gr.Textbox(label="Vectara Sources", lines=10),
-        gr.Textbox(label="Generated Text"),
         gr.Textbox(label="Hallucination Score")
     ],
     live=False,

 import json
 import time
 import transformers
+import re
 from transformers import AutoTokenizer, AutoModelForCausalLM
 hf_token = os.getenv("HF_AUTH_TOKEN")
     else:
         return f"Error: {response.status_code}"
+def clean_text(text):
+    # Function to clean text using regex
+    cleaned_text = re.sub(r'[^\w\s]', '', text)  # Remove special characters except spaces
+    return cleaned_text
+def evaluate_content(user_input):
+    vectara_response = query_vectara(user_input)
+    vectara_response_json = json.loads(vectara_response)
+    summary = vectara_response_json.get("summary", "")
+    sources = vectara_response_json.get("sources", [])
+    # Clean summary text
+    summary_clean = clean_text(summary)
+    # Process sources to extract and clean necessary information
+    sources_info = ""
+    for source in sources:
+        title = source.get("title", "No title")
+        author = source.get("author", "No author")
+        page_number = source.get("page number", "N/A")
+        # Clean source info
+        title_clean = clean_text(title)
+        author_clean = clean_text(author)
+        sources_info += f"Title: {title_clean}, Author: {author_clean}, Page: {page_number}\n"
+    # Generate text based on the cleaned summary
+    olmo_output = generate_text(summary_clean)
+    olmo_output_clean = clean_text(olmo_output)
+    # Check hallucination based on the original output and summary
+    hallucination_score = check_hallucination(olmo_output, summary)
+    return summary_clean, sources_info, olmo_output_clean, hallucination_score
+# Adjust the Gradio interface outputs to match the new structure
 iface = gr.Interface(
     fn=evaluate_content,
     inputs=[gr.Textbox(label="User Input")],
     outputs=[
+        gr.Textbox(label="Vectara Summary", lines=10),
         gr.Textbox(label="Vectara Sources", lines=10),
+        gr.Textbox(label="Generated Text", lines=10),
         gr.Textbox(label="Hallucination Score")
     ],
     live=False,