Spaces:
Runtime error
Runtime error
| pip install "gradio[mcp]" | |
| import gradio as gr | |
| import anthropic | |
| import os | |
| import base64 | |
| import fitz # PyMuPDF | |
| import json | |
| # It's recommended to load the API key from secrets when deploying | |
| # For Hugging Face Spaces, you would set this as a secret in your Space settings | |
| try: | |
| ANTHROPIC_API_KEY = userdata.get('ANTHROPIC_API_KEY') | |
| except: | |
| ANTHROPIC_API_KEY = os.environ.get('ANTHROPIC_API_KEY') | |
| client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY) | |
| # Helper Functions from the notebook | |
| def visualize_raw_response(response): | |
| raw_response = {"content": []} | |
| for content in response.content: | |
| if content.type == "text": | |
| block = {"type": "text", "text": content.text} | |
| if hasattr(content, 'citations') and content.citations: | |
| block["citations"] = [vars(c) for c in content.citations] | |
| raw_response["content"].append(block) | |
| return json.dumps(raw_response, indent=2) | |
| def format_citations(response): | |
| if not response: | |
| return "" | |
| citations_dict = {} | |
| citation_counter = 1 | |
| formatted_text = "" | |
| citations_list = [] | |
| for content in response.content: | |
| if content.type == "text": | |
| text = content.text | |
| if hasattr(content, 'citations') and content.citations: | |
| sorted_citations = sorted(content.citations, key=lambda c: getattr(c, 'start_char_index', 0) or getattr(c, 'start_page_number', 0) or getattr(c, 'start_block_index', 0)) | |
| for citation in sorted_citations: | |
| doc_title = citation.document_title | |
| cited_text = ' '.join(citation.cited_text.replace('\n', ' ').replace('\r', ' ').split()) | |
| citation_key = f"{doc_title}:{cited_text}" | |
| if citation_key not in citations_dict: | |
| citations_dict[citation_key] = citation_counter | |
| citations_list.append(f"[{citation_counter}] \"{cited_text}\" found in \"{doc_title}\"") | |
| citation_counter += 1 | |
| citation_num = citations_dict[citation_key] | |
| text += f" [{citation_num}]" | |
| formatted_text += text | |
| return formatted_text + "\n\n" + "\n".join(citations_list) | |
| def process_documents(doc_type, file_paths): | |
| documents = [] | |
| if not file_paths: | |
| return documents | |
| for file_path in file_paths: | |
| with open(file_path, 'rb') as f: | |
| content = f.read() | |
| if doc_type == 'Plain Text': | |
| documents.append({"type": "document", "source": {"type": "text", "media_type": "text/plain", "data": content.decode('utf-8')}, "title": os.path.basename(file_path), "citations": {"enabled": True}}) | |
| elif doc_type == 'PDF': | |
| documents.append({"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": base64.b64encode(content).decode('utf-8')}, "title": os.path.basename(file_path), "citations": {"enabled": True}}) | |
| elif doc_type == 'Custom Content': | |
| documents.append({"type": "document", "source": {"type": "content", "content": [{"type": "text", "text": content.decode('utf-8')}]}, "title": os.path.basename(file_path), "citations": {"enabled": True}}) | |
| return documents | |
| def get_anthropic_response(documents, question): | |
| if not documents or not question: | |
| return None | |
| try: | |
| messages = [{"role": "user", "content": documents + [{"type": "text", "text": question}]}] | |
| response = client.messages.create(model="claude-3-5-sonnet-latest", temperature=0.0, max_tokens=1024, messages=messages) | |
| return response | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| return None | |
| def highlight_pdf(response, pdf_path): | |
| if not response: | |
| return None | |
| pdf_citations = [c for content in response.content if hasattr(content, 'citations') and content.citations for c in content.citations if c.type == "page_location"] | |
| if not pdf_citations: | |
| return None | |
| doc = fitz.open(pdf_path) | |
| output_pdf_path = "highlighted_output.pdf" | |
| for citation in pdf_citations: | |
| text_to_find = citation.cited_text.replace('\u0002', '') | |
| start_page = citation.start_page_number - 1 | |
| end_page = citation.end_page_number - 1 | |
| for page_num in range(start_page, end_page + 1): | |
| if 0 <= page_num < len(doc): | |
| page = doc[page_num] | |
| text_instances = page.search_for(text_to_find.strip()) | |
| for inst in text_instances: | |
| highlight = page.add_highlight_annot(inst) | |
| highlight.set_colors({"stroke": (1, 1, 0)}) | |
| highlight.update() | |
| doc.save(output_pdf_path) | |
| doc.close() | |
| return output_pdf_path | |
| def annotate_pdf(pdf_path, annotation_text, page_number): | |
| if not pdf_path or not os.path.exists(pdf_path): return None | |
| doc = fitz.open(pdf_path) | |
| page_index = page_number - 1 | |
| if not 0 <= page_index < len(doc): doc.close(); return None | |
| page = doc[page_index] | |
| rect = fitz.Rect(50, 50, 400, 100) | |
| page.insert_textbox(rect, annotation_text, fontsize=12, color=(1, 0, 0)) | |
| output_pdf_path = pdf_path.replace(".pdf", "_annotated.pdf") | |
| doc.save(output_pdf_path) | |
| doc.close() | |
| return output_pdf_path | |
| def process_and_display(doc_type, question, files, load_samples, annotation_text, annotation_page): | |
| original_pdf_path = None | |
| file_names = [] | |
| if load_samples: | |
| # This part needs to be adapted for a deployed environment | |
| # as it relies on a local 'data' directory structure. | |
| # For deployment, you'd package these files with your app. | |
| question = "Sample question" | |
| file_names = [] # Add paths to sample files here | |
| elif files: | |
| file_names = [f.name for f in files] | |
| if not file_names: | |
| return "Please upload documents or load sample data.", {}, None, None, None, None, None, None | |
| if doc_type == 'PDF' and file_names: | |
| original_pdf_path = file_names[0] | |
| documents = process_documents(doc_type, file_names) | |
| response = get_anthropic_response(documents, question) | |
| if not response: | |
| return "Failed to get response from API.", {}, None, None, None, None, None, None | |
| formatted_response = format_citations(response) | |
| raw_response_json_str = visualize_raw_response(response) | |
| raw_response_json = json.loads(raw_response_json_str) | |
| highlighted_pdf_path = None | |
| annotated_pdf_path = None | |
| if doc_type == 'PDF': | |
| highlighted_pdf_path = highlight_pdf(response, original_pdf_path) | |
| if annotation_text and annotation_page: | |
| pdf_to_annotate = highlighted_pdf_path if highlighted_pdf_path else original_pdf_path | |
| if pdf_to_annotate: | |
| annotated_pdf_path = annotate_pdf(pdf_to_annotate, annotation_text, int(annotation_page)) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding='utf-8') as f: | |
| f.write(formatted_response) | |
| formatted_response_path = f.name | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w", encoding='utf-8') as f: | |
| f.write(raw_response_json_str) | |
| raw_response_path = f.name | |
| final_pdf_path = annotated_pdf_path if annotated_pdf_path else highlighted_pdf_path | |
| return formatted_response, raw_response_json, highlighted_pdf_path, original_pdf_path, formatted_response_path, raw_response_path, final_pdf_path, final_pdf_path | |
| # Gradio Interface | |
| iface = gr.Interface( | |
| fn=process_and_display, | |
| inputs=[ | |
| gr.Radio(['Plain Text', 'PDF', 'Custom Content'], label="Document Type"), | |
| gr.Textbox(lines=2, placeholder="Enter your question here...", label="Question"), | |
| gr.File(file_count="multiple", label="Upload Documents"), | |
| gr.Checkbox(label="Load Sample Data (requires data folder)"), | |
| gr.Textbox(lines=2, placeholder="Enter annotation text...", label="Annotation Text"), | |
| gr.Number(label="Annotation Page Number", precision=0) | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Formatted Response"), | |
| gr.JSON(label="Raw API Response"), | |
| gr.File(label="Highlighted PDF"), | |
| gr.File(label="Original PDF"), | |
| gr.File(label="Download Formatted Response"), | |
| gr.File(label="Download Raw Response"), | |
| gr.File(label="Download Highlighted PDF"), | |
| gr.File(label="Final Annotated PDF") | |
| ], | |
| title="Anthropic Citations API Explorer", | |
| description="Explore Anthropic's citation capabilities. Upload documents, ask questions, see cited responses, and add your own annotations." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |